#Partisan Stability During Turbulent Times: The American Panel Survey (TAPS) data analysis
#Donald P. Green and Paul B. Platzman
#July 2021

#NOTE TO USER: always run all code from the start of the document until line 368, which reads: "####Computing number of surviving complete cases." This reads in and processes the data in preparation for analysis. After line 368, any code chunks separated by four hashtags can be run on their own.

#Data for this script can be found at the following URL: https://wc.wustl.edu/taps-data-archive. After the data is downloaded, it can be imported in the following manner:

####Loading data
library("foreign")
library("haven")
taps_2012 = read.dta("./TAPSdata2012.dta")
taps_2013 = read.dta("./TAPSdata2013.dta")
taps_2014 = read_dta("./2014release.dta")
taps_2015 = read.dta("./TAPSdata2015.dta")
taps_2016 = read_dta("./2016.dta")
taps_2017 = read_dta("./2017.dta")
taps_2018 = read_dta("./2018.dta")



####Defining functions
income = function(x){
  if(is.na(x)){
    return(NA)
  }
  else if(x=="below $10,000"){
    return(as.numeric(5000))
  }
  else if(x=="$10,000 to $19,999"){
    return(as.numeric(15000))
  }
  else if(x=="$20,000 to $29,999"){
    return(as.numeric(25000))
  }
  else if(x=="$30,000 to $39,999"){
    return(as.numeric(35000))
  }
  else if(x=="$40,000 to $49,999"){
    return(as.numeric(45000))
  }
  else if(x=="$50,000 to $59,999"){
    return(as.numeric(55000))
  }
  else if(x=="$60,000 to $69,999"){
    return(as.numeric(65000))
  }
  else if(x=="$70,000 to $79,999"){
    return(as.numeric(75000))
  }
  else if(x=="$80,000 to $89,999"){
    return(as.numeric(85000))
  }
  else if(x=="$90,000 to $99,999"){
    return(as.numeric(95000))
  }
  else if(x=="$100,000 to $124,999"){
    return(as.numeric(112500))
  }
  else if(x=="$125,000 to $149,999"){
    return(as.numeric(137500))
  }
  else if(x=="$150,000 to $199,999"){
    return(as.numeric(175000))
  }
  else if(x=="$200,000 to $249,999"){
    return(as.numeric(225000))
  }
  else if(x=="$250,000 to $299,999"){
    return(as.numeric(275000))
  }
  else if(x=="$300,000 or more"){
    return(as.numeric(350000))
  }
  else{
    return(NA)
  }
}

wiley_r_squared = function(df,first_wave){
  #Defining relative waves
  y0 = df[,first_wave-1]
  y1 = df[,first_wave]
  y2 = df[,first_wave+1]
  y3 = df[,first_wave+2]
  
  #Computing components of R^2
  cov_y1_y2_squared = cov(y1,y2)^2
  var_y1 = var(y1)
  var_y2 = var(y2)
  var_epsilon1 = var_y1 - (cov(y1,y2)*(cov(y0,y1)/cov(y0,y2)))
  var_epsilon2 = var_y2 - (cov(y2,y3)*(cov(y1,y2)/cov(y1,y3)))
  
  #Computing implied R^2
  r_squared_12 = cov_y1_y2_squared/((var_y1-var_epsilon1)*(var_y2-var_epsilon2))
  
  return(r_squared_12)
}

wiley_r_squared_first_wave_anchored = function(df,first_wave,increment){
  #Defining relative waves
  y0 = df[,first_wave-1]
  y1 = df[,first_wave]
  y2 = df[,first_wave+1+increment]
  y3 = df[,first_wave+2+increment]
  
  #Computing components of R^2
  cov_y1_y2_squared = cov(y1,y2)^2
  var_y1 = var(y1)
  var_y2 = var(y2)
  var_epsilon1 = var_y1 - (cov(y1,y2)*(cov(y0,y1)/cov(y0,y2)))
  var_epsilon2 = var_y2 - (cov(y2,y3)*(cov(y1,y2)/cov(y1,y3)))
  #Computing implied R^2
  r_squared_12 = cov_y1_y2_squared/((var_y1-var_epsilon1)*(var_y2-var_epsilon2))
  
  return(r_squared_12)
}

macroP = function(x){
  dem=0
  total=0
  for( i in x){
    if(i==-1){
      dem = dem+1
      total = total+1
    }
    else if (i==1){
      total = total+1
    }
  }
  return(dem/total)
}

library("Hmisc")
wiley_r_squared_first_wave_anchored_weighted = function(df,first_wave,increment,weight){
  #Defining relative waves
  y0 = first_wave-1
  y1 = first_wave
  y2 = first_wave+1+increment
  y3 = first_wave+2+increment
  
  #Computing components of R^2
  cov_y1_y2_squared_pre = cov.wt(df[,c(y1,y2)],wt=weight)
  cov_y1_y2_squared = cov_y1_y2_squared_pre$cov[2]^2
  var_y1 = wtd.var(df[,y1],weights=weight)
  var_y2 = wtd.var(df[,y2],weights=weight)
  cov_y1_y2_pre = cov.wt(df[,c(y1,y2)],wt=weight)
  cov_y1_y2 = cov_y1_y2_pre$cov[2]
  cov_y0_y1_pre = cov.wt(df[,c(y0,y1)],wt=weight)
  cov_y0_y1 = cov_y0_y1_pre$cov[2]
  cov_y0_y2_pre = cov.wt(df[,c(y0,y2)],wt=weight)
  cov_y0_y2 = cov_y0_y2_pre$cov[2]
  cov_y1_y3_pre = cov.wt(df[,c(y1,y3)],wt=weight)
  cov_y1_y3 = cov_y1_y3_pre$cov[2]
  cov_y2_y3_pre = cov.wt(df[,c(y2,y3)],wt=weight)
  cov_y2_y3 = cov_y2_y3_pre$cov[2]
  var_epsilon1 = var_y1 - (cov_y1_y2*(cov_y0_y1/cov_y0_y2))
  var_epsilon2 = var_y2 - (cov_y2_y3*(cov_y1_y2/cov_y1_y3))
  #Computing implied R^2
  r_squared_12 = cov_y1_y2_squared/((var_y1-var_epsilon1)*(var_y2-var_epsilon2))
  
  return(r_squared_12)
}



####Renaming columns
names(taps_2014)[names(taps_2014) == 'wustlid'] <- 'WUSTLID'
names(taps_2016)[names(taps_2016) == 'wustlid'] <- 'WUSTLID'



####Defining PID columns
##S1
taps_2012$S1_PID3 = ifelse(taps_2012$PARTYID1S1=="Democrat",-1,ifelse(taps_2012$PARTYID1S1=="Republican",1,ifelse(taps_2012$PARTYID1S1=="Independent",0,NA)))
table(taps_2012$PARTYID1S1,taps_2012$S1_PID3)
taps_2012$S1_PID7 = ifelse(taps_2012$PARTYID1S1=="Democrat" & taps_2012$PARTYID2S1=="Strong",-3,ifelse(taps_2012$PARTYID1S1=="Democrat"& taps_2012$PARTYID2S1=="Not Very Strong",-2,ifelse(taps_2012$PARTYID1S1=="Republican"& taps_2012$PARTYID2S1=="Strong",3,ifelse(taps_2012$PARTYID1S1=="Republican"& taps_2012$PARTYID2S1=="Not Very Strong",2,ifelse((taps_2012$PARTYID1S1=="Independent" & taps_2012$PARTYID3S1=="Democratic Party")|(taps_2012$PARTYID1S1=="Other, please specify:" & taps_2012$PARTYID4S1=="Democrats")|(taps_2012$PARTYID1S1=="Refused" & taps_2012$PARTYID4S1=="Democrats"),-1,ifelse((taps_2012$PARTYID1S1=="Independent" & taps_2012$PARTYID3S1=="Republican Party")|(taps_2012$PARTYID1S1=="Other, please specify:" & taps_2012$PARTYID4S1=="Republicans")|(taps_2012$PARTYID1S1=="Refused" & taps_2012$PARTYID4S1=="Republicans"),1,ifelse(taps_2012$PARTYID1S1=="Independent" & taps_2012$PARTYID3S1=="Refused",0,NA)))))))
table(taps_2012$PARTYID1S1,taps_2012$S1_PID7)
sum(!is.na(taps_2012$S1_PID3))
sum(!is.na(taps_2012$S1_PID7))

##S5
taps_2012$S5_PID3 = ifelse(taps_2012$PARTYID1S5=="Democrat",-1,ifelse(taps_2012$PARTYID1S5=="Republican",1,ifelse(taps_2012$PARTYID1S5=="Independent",0,NA)))
table(taps_2012$PARTYID1S5,taps_2012$S5_PID3)
taps_2012$S5_PID7 = ifelse(taps_2012$PARTYID1S5=="Democrat" & taps_2012$PARTYID2S5=="Strong",-3,ifelse(taps_2012$PARTYID1S5=="Democrat"& taps_2012$PARTYID2S5=="Not Very Strong",-2,ifelse(taps_2012$PARTYID1S5=="Republican"& taps_2012$PARTYID2S5=="Strong",3,ifelse(taps_2012$PARTYID1S5=="Republican"& taps_2012$PARTYID2S5=="Not Very Strong",2,ifelse((taps_2012$PARTYID1S5=="Independent" & taps_2012$PARTYID3S5=="Democratic Party")|(taps_2012$PARTYID1S5=="Other, please specify:" & taps_2012$PARTYID4S5=="Democrats")|(taps_2012$PARTYID1S5=="Refused" & taps_2012$PARTYID4S5=="Democrats"),-1,ifelse((taps_2012$PARTYID1S5=="Independent" & taps_2012$PARTYID3S5=="Republican Party")|(taps_2012$PARTYID1S5=="Other, please specify:" & taps_2012$PARTYID4S5=="Republicans")|(taps_2012$PARTYID1S5=="Refused" & taps_2012$PARTYID4S5=="Republicans"),1,ifelse(taps_2012$PARTYID1S5=="Independent" & taps_2012$PARTYID3S5=="Refused",0,NA)))))))
table(taps_2012$PARTYID1S5,taps_2012$S5_PID7)
sum(!is.na(taps_2012$S5_PID3))
sum(!is.na(taps_2012$S5_PID7))

##S7
taps_2012$S7_PID3 = ifelse(taps_2012$PARTYID1S7=="Democrat",-1,ifelse(taps_2012$PARTYID1S7=="Republican",1,ifelse(taps_2012$PARTYID1S7=="Independent",0,NA)))
table(taps_2012$PARTYID1S7,taps_2012$S7_PID3)
taps_2012$S7_PID7 = ifelse(taps_2012$PARTYID1S7=="Democrat" & taps_2012$PARTYID2S7=="Strong",-3,ifelse(taps_2012$PARTYID1S7=="Democrat"& taps_2012$PARTYID2S7=="Not Very Strong",-2,ifelse(taps_2012$PARTYID1S7=="Republican"& taps_2012$PARTYID2S7=="Strong",3,ifelse(taps_2012$PARTYID1S7=="Republican"& taps_2012$PARTYID2S7=="Not Very Strong",2,ifelse((taps_2012$PARTYID1S7=="Independent" & taps_2012$PARTYID3S7=="Democratic Party")|(taps_2012$PARTYID1S7=="Other, please specify:" & taps_2012$PARTYID4S7=="Democrats")|(taps_2012$PARTYID1S7=="Refused" & taps_2012$PARTYID4S7=="Democrats"),-1,ifelse((taps_2012$PARTYID1S7=="Independent" & taps_2012$PARTYID3S7=="Republican Party")|(taps_2012$PARTYID1S7=="Other, please specify:" & taps_2012$PARTYID4S7=="Republicans")|(taps_2012$PARTYID1S7=="Refused" & taps_2012$PARTYID4S7=="Republicans"),1,ifelse(taps_2012$PARTYID1S7=="Independent" & taps_2012$PARTYID3S7=="Refused",0,NA)))))))
table(taps_2012$PARTYID1S7,taps_2012$S7_PID7)
sum(!is.na(taps_2012$S7_PID3))
sum(!is.na(taps_2012$S7_PID7))

##S10
taps_2012$S10_PID3 = ifelse(taps_2012$PARTYID1S10=="Democrat",-1,ifelse(taps_2012$PARTYID1S10=="Republican",1,ifelse(taps_2012$PARTYID1S10=="Independent",0,NA)))
table(taps_2012$PARTYID1S10,taps_2012$S10_PID3)
taps_2012$S10_PID7 = ifelse(taps_2012$PARTYID1S10=="Democrat" & taps_2012$PARTYID2S10=="Strong",-3,ifelse(taps_2012$PARTYID1S10=="Democrat"& taps_2012$PARTYID2S10=="Not Very Strong",-2,ifelse(taps_2012$PARTYID1S10=="Republican"& taps_2012$PARTYID2S10=="Strong",3,ifelse(taps_2012$PARTYID1S10=="Republican"& taps_2012$PARTYID2S10=="Not Very Strong",2,ifelse((taps_2012$PARTYID1S10=="Independent" & taps_2012$PARTYID3S10=="Democratic Party")|(taps_2012$PARTYID1S10=="Other, please specify:" & taps_2012$PARTYID4S10=="Democrats")|(taps_2012$PARTYID1S10=="Refused" & taps_2012$PARTYID4S10=="Democrats"),-1,ifelse((taps_2012$PARTYID1S10=="Independent" & taps_2012$PARTYID3S10=="Republican Party")|(taps_2012$PARTYID1S10=="Other, please specify:" & taps_2012$PARTYID4S10=="Republicans")|(taps_2012$PARTYID1S10=="Refused" & taps_2012$PARTYID4S10=="Republicans"),1,ifelse(taps_2012$PARTYID1S10=="Independent" & taps_2012$PARTYID3S10=="Refused",0,NA)))))))
table(taps_2012$PARTYID1S10,taps_2012$S10_PID7)
sum(!is.na(taps_2012$S10_PID3))
sum(!is.na(taps_2012$S10_PID7))

##S11
taps_2012$S11_PID3 = ifelse(taps_2012$PARTYID1S11=="Democrat",-1,ifelse(taps_2012$PARTYID1S11=="Republican",1,ifelse(taps_2012$PARTYID1S11=="Independent",0,NA)))
table(taps_2012$PARTYID1S11,taps_2012$S11_PID3)
taps_2012$S11_PID7 = ifelse(taps_2012$PARTYID1S11=="Democrat" & taps_2012$PARTYID2S11=="Strong",-3,ifelse(taps_2012$PARTYID1S11=="Democrat"& taps_2012$PARTYID2S11=="Not Very Strong",-2,ifelse(taps_2012$PARTYID1S11=="Republican"& taps_2012$PARTYID2S11=="Strong",3,ifelse(taps_2012$PARTYID1S11=="Republican"& taps_2012$PARTYID2S11=="Not Very Strong",2,ifelse((taps_2012$PARTYID1S11=="Independent" & taps_2012$PARTYID3S11=="Democratic Party")|(taps_2012$PARTYID1S11=="Other, please specify:" & taps_2012$PARTYID4S11=="Democrats")|(taps_2012$PARTYID1S11=="Refused" & taps_2012$PARTYID4S11=="Democrats"),-1,ifelse((taps_2012$PARTYID1S11=="Independent" & taps_2012$PARTYID3S11=="Republican Party")|(taps_2012$PARTYID1S11=="Other, please specify:" & taps_2012$PARTYID4S11=="Republicans")|(taps_2012$PARTYID1S11=="Refused" & taps_2012$PARTYID4S11=="Republicans"),1,ifelse(taps_2012$PARTYID1S11=="Independent" & taps_2012$PARTYID3S11=="Refused",0,NA)))))))
table(taps_2012$PARTYID1S11,taps_2012$S11_PID7)
sum(!is.na(taps_2012$S11_PID3))
sum(!is.na(taps_2012$S11_PID7))

##S12
taps_2012$S12_PID3 = ifelse(taps_2012$PARTYID1S12=="Democrat",-1,ifelse(taps_2012$PARTYID1S12=="Republican",1,ifelse(taps_2012$PARTYID1S12=="Independent",0,NA)))
table(taps_2012$PARTYID1S12,taps_2012$S12_PID3)
taps_2012$S12_PID7 = ifelse(taps_2012$PARTYID1S12=="Democrat" & taps_2012$PARTYID2S12=="Strong",-3,ifelse(taps_2012$PARTYID1S12=="Democrat"& taps_2012$PARTYID2S12=="Not Very Strong",-2,ifelse(taps_2012$PARTYID1S12=="Republican"& taps_2012$PARTYID2S12=="Strong",3,ifelse(taps_2012$PARTYID1S12=="Republican"& taps_2012$PARTYID2S12=="Not Very Strong",2,ifelse((taps_2012$PARTYID1S12=="Independent" & taps_2012$PARTYID3S12=="Democratic Party")|(taps_2012$PARTYID1S12=="Other, please specify:" & taps_2012$PARTYID4S12=="Democrats")|(taps_2012$PARTYID1S12=="Refused" & taps_2012$PARTYID4S12=="Democrats"),-1,ifelse((taps_2012$PARTYID1S12=="Independent" & taps_2012$PARTYID3S12=="Republican Party")|(taps_2012$PARTYID1S12=="Other, please specify:" & taps_2012$PARTYID4S12=="Republicans")|(taps_2012$PARTYID1S12=="Refused" & taps_2012$PARTYID4S12=="Republicans"),1,ifelse(taps_2012$PARTYID1S12=="Independent" & taps_2012$PARTYID3S12=="Refused",0,NA)))))))
table(taps_2012$PARTYID1S12,taps_2012$S12_PID7)
sum(!is.na(taps_2012$S12_PID3))
sum(!is.na(taps_2012$S12_PID7))

##S17
taps_2013$S17_PID3 = ifelse(taps_2013$PARTYID1S17=="Democrat",-1,ifelse(taps_2013$PARTYID1S17=="Republican",1,ifelse(taps_2013$PARTYID1S17=="Independent",0,NA)))
table(taps_2013$PARTYID1S17,taps_2013$S17_PID3)
taps_2013$S17_PID7 = ifelse(taps_2013$PARTYID1S17=="Democrat" & taps_2013$PARTYID2S17=="Strong",-3,ifelse(taps_2013$PARTYID1S17=="Democrat"& taps_2013$PARTYID2S17=="Not Very Strong",-2,ifelse(taps_2013$PARTYID1S17=="Republican"& taps_2013$PARTYID2S17=="Strong",3,ifelse(taps_2013$PARTYID1S17=="Republican"& taps_2013$PARTYID2S17=="Not Very Strong",2,ifelse((taps_2013$PARTYID1S17=="Independent" & taps_2013$PARTYID3S17=="Democratic Party")|(taps_2013$PARTYID1S17=="Other, please specify:" & taps_2013$PARTYID4S17=="Democrats")|(taps_2013$PARTYID1S17=="Refused" & taps_2013$PARTYID4S17=="Democrats"),-1,ifelse((taps_2013$PARTYID1S17=="Independent" & taps_2013$PARTYID3S17=="Republican Party")|(taps_2013$PARTYID1S17=="Other, please specify:" & taps_2013$PARTYID4S17=="Republicans")|(taps_2013$PARTYID1S17=="Refused" & taps_2013$PARTYID4S17=="Republicans"),1,ifelse(taps_2013$PARTYID1S17=="Independent" & taps_2013$PARTYID3S17=="Refused",0,NA)))))))
table(taps_2013$PARTYID1S17,taps_2013$S17_PID7)
sum(!is.na(taps_2013$S17_PID3))
sum(!is.na(taps_2013$S17_PID7))

##S18
taps_2013$S18_PID3 = ifelse(taps_2013$PARTYID1S18=="Democrat",-1,ifelse(taps_2013$PARTYID1S18=="Republican",1,ifelse(taps_2013$PARTYID1S18=="Independent",0,NA)))
table(taps_2013$PARTYID1S18,taps_2013$S18_PID3)
taps_2013$S18_PID7 = ifelse(taps_2013$PARTYID1S18=="Democrat" & taps_2013$PARTYID2S18=="Strong",-3,ifelse(taps_2013$PARTYID1S18=="Democrat"& taps_2013$PARTYID2S18=="Not Very Strong",-2,ifelse(taps_2013$PARTYID1S18=="Republican"& taps_2013$PARTYID2S18=="Strong",3,ifelse(taps_2013$PARTYID1S18=="Republican"& taps_2013$PARTYID2S18=="Not Very Strong",2,ifelse((taps_2013$PARTYID1S18=="Independent" & taps_2013$PARTYID3S18=="Democratic Party")|(taps_2013$PARTYID1S18=="Other, please specify:" & taps_2013$PARTYID4S18=="Democrats")|(taps_2013$PARTYID1S18=="Refused" & taps_2013$PARTYID4S18=="Democrats"),-1,ifelse((taps_2013$PARTYID1S18=="Independent" & taps_2013$PARTYID3S18=="Republican Party")|(taps_2013$PARTYID1S18=="Other, please specify:" & taps_2013$PARTYID4S18=="Republicans")|(taps_2013$PARTYID1S18=="Refused" & taps_2013$PARTYID4S18=="Republicans"),1,ifelse(taps_2013$PARTYID1S18=="Independent" & taps_2013$PARTYID3S18=="Refused",0,NA)))))))
table(taps_2013$PARTYID1S18,taps_2013$S18_PID7)
sum(!is.na(taps_2013$S18_PID3))
sum(!is.na(taps_2013$S18_PID7))

##S20
taps_2013$S20_PID3 = ifelse(taps_2013$PARTYID1S20=="Democrat",-1,ifelse(taps_2013$PARTYID1S20=="Republican",1,ifelse(taps_2013$PARTYID1S20=="Independent",0,NA)))
table(taps_2013$PARTYID1S20,taps_2013$S20_PID3)
taps_2013$S20_PID7 = ifelse(taps_2013$PARTYID1S20=="Democrat" & taps_2013$PARTYID2S20=="Strong",-3,ifelse(taps_2013$PARTYID1S20=="Democrat"& taps_2013$PARTYID2S20=="Not Very Strong",-2,ifelse(taps_2013$PARTYID1S20=="Republican"& taps_2013$PARTYID2S20=="Strong",3,ifelse(taps_2013$PARTYID1S20=="Republican"& taps_2013$PARTYID2S20=="Not Very Strong",2,ifelse((taps_2013$PARTYID1S20=="Independent" & taps_2013$PARTYID3S20=="Democratic Party")|(taps_2013$PARTYID1S20=="Other, please specify:" & taps_2013$PARTYID4S20=="Democrats")|(taps_2013$PARTYID1S20=="Refused" & taps_2013$PARTYID4S20=="Democrats"),-1,ifelse((taps_2013$PARTYID1S20=="Independent" & taps_2013$PARTYID3S20=="Republican Party")|(taps_2013$PARTYID1S20=="Other, please specify:" & taps_2013$PARTYID4S20=="Republicans")|(taps_2013$PARTYID1S20=="Refused" & taps_2013$PARTYID4S20=="Republicans"),1,ifelse(taps_2013$PARTYID1S20=="Independent" & taps_2013$PARTYID3S20=="Refused",0,NA)))))))
table(taps_2013$PARTYID1S20,taps_2013$S20_PID7)
sum(!is.na(taps_2013$S20_PID3))
sum(!is.na(taps_2013$S20_PID7))

##S25
taps_2013$S25_PID3 = ifelse(taps_2013$PARTYID1S25=="Democrat",-1,ifelse(taps_2013$PARTYID1S25=="Republican",1,ifelse(taps_2013$PARTYID1S25=="Independent",0,NA)))
table(taps_2013$PARTYID1S25,taps_2013$S25_PID3)
taps_2013$S25_PID7 = ifelse(taps_2013$PARTYID1S25=="Democrat" & taps_2013$PARTYID2S25=="Strong",-3,ifelse(taps_2013$PARTYID1S25=="Democrat"& taps_2013$PARTYID2S25=="Not Very Strong",-2,ifelse(taps_2013$PARTYID1S25=="Republican"& taps_2013$PARTYID2S25=="Strong",3,ifelse(taps_2013$PARTYID1S25=="Republican"& taps_2013$PARTYID2S25=="Not Very Strong",2,ifelse((taps_2013$PARTYID1S25=="Independent" & taps_2013$PARTYID3S25=="Democratic Party")|(taps_2013$PARTYID1S25=="Other, please specify:" & taps_2013$PARTYID4S25=="Democrats")|(taps_2013$PARTYID1S25=="Refused" & taps_2013$PARTYID4S25=="Democrats"),-1,ifelse((taps_2013$PARTYID1S25=="Independent" & taps_2013$PARTYID3S25=="Republican Party")|(taps_2013$PARTYID1S25=="Other, please specify:" & taps_2013$PARTYID4S25=="Republicans")|(taps_2013$PARTYID1S25=="Refused" & taps_2013$PARTYID4S25=="Republicans"),1,ifelse(taps_2013$PARTYID1S25=="Independent" & taps_2013$PARTYID3S25=="Refused",0,NA)))))))
table(taps_2013$PARTYID1S25,taps_2013$S25_PID7)
sum(!is.na(taps_2013$S25_PID3))
sum(!is.na(taps_2013$S25_PID7))

##S28
taps_2014$S28_PID3 = ifelse(taps_2014$PARTYID1S28==2,-1,ifelse(taps_2014$PARTYID1S28==3,1,ifelse(taps_2014$PARTYID1S28==4,0,NA)))
table(taps_2014$PARTYID1S28,taps_2014$S28_PID3)
taps_2014$S28_PID7 = ifelse(taps_2014$PARTYID1S28==2 & taps_2014$PARTYID2S28==2,-3,ifelse(taps_2014$PARTYID1S28==2 & taps_2014$PARTYID2S28==3,-2,ifelse(taps_2014$PARTYID1S28==3 & taps_2014$PARTYID2S28==2,3,ifelse(taps_2014$PARTYID1S28==3 & taps_2014$PARTYID2S28==3,2,ifelse((taps_2014$PARTYID1S28==4 & taps_2014$PARTYID3S28==2)|(taps_2014$PARTYID1S28==5 & taps_2014$PARTYID4S28==2)|(taps_2014$PARTYID1S28==1 & taps_2014$PARTYID4S28==2),-1,ifelse((taps_2014$PARTYID1S28==4 & taps_2014$PARTYID3S28==3)|(taps_2014$PARTYID1S28==5 & taps_2014$PARTYID4S28==3)|(taps_2014$PARTYID1S28==1 & taps_2014$PARTYID4S28==3),1,ifelse(taps_2014$PARTYID1S28==4 & taps_2014$PARTYID3S28==1,0,NA)))))))
table(taps_2014$PARTYID1S28,taps_2014$S28_PID7)
sum(!is.na(taps_2014$S28_PID3))
sum(!is.na(taps_2014$S28_PID7))

##S31
taps_2014$S31_PID3 = ifelse(taps_2014$PARTYID1S31==2,-1,ifelse(taps_2014$PARTYID1S31==3,1,ifelse(taps_2014$PARTYID1S31==4,0,NA)))
table(taps_2014$PARTYID1S31,taps_2014$S31_PID3)
taps_2014$S31_PID7 = ifelse(taps_2014$PARTYID1S31==2 & taps_2014$PARTYID2S31==2,-3,ifelse(taps_2014$PARTYID1S31==2 & taps_2014$PARTYID2S31==3,-2,ifelse(taps_2014$PARTYID1S31==3 & taps_2014$PARTYID2S31==2,3,ifelse(taps_2014$PARTYID1S31==3 & taps_2014$PARTYID2S31==3,2,ifelse((taps_2014$PARTYID1S31==4 & taps_2014$PARTYID3S31==2)|(taps_2014$PARTYID1S31==5 & taps_2014$PARTYID4S31==2)|(taps_2014$PARTYID1S31==1 & taps_2014$PARTYID4S31==2),-1,ifelse((taps_2014$PARTYID1S31==4 & taps_2014$PARTYID3S31==3)|(taps_2014$PARTYID1S31==5 & taps_2014$PARTYID4S31==3)|(taps_2014$PARTYID1S31==1 & taps_2014$PARTYID4S31==3),1,ifelse(taps_2014$PARTYID1S31==4 & taps_2014$PARTYID3S31==1,0,NA)))))))
table(taps_2014$PARTYID1S31,taps_2014$S31_PID7)
sum(!is.na(taps_2014$S31_PID3))
sum(!is.na(taps_2014$S31_PID7))

##S34
taps_2014$S34_PID3 = ifelse(taps_2014$PARTYID1S34==2,-1,ifelse(taps_2014$PARTYID1S34==3,1,ifelse(taps_2014$PARTYID1S34==4,0,NA)))
table(taps_2014$PARTYID1S34,taps_2014$S34_PID3)
taps_2014$S34_PID7 = ifelse(taps_2014$PARTYID1S34==2 & taps_2014$PARTYID2S34==2,-3,ifelse(taps_2014$PARTYID1S34==2 & taps_2014$PARTYID2S34==3,-2,ifelse(taps_2014$PARTYID1S34==3 & taps_2014$PARTYID2S34==2,3,ifelse(taps_2014$PARTYID1S34==3 & taps_2014$PARTYID2S34==3,2,ifelse((taps_2014$PARTYID1S34==4 & taps_2014$PARTYID3S34==2)|(taps_2014$PARTYID1S34==5 & taps_2014$PARTYID4S34==2)|(taps_2014$PARTYID1S34==1 & taps_2014$PARTYID4S34==2),-1,ifelse((taps_2014$PARTYID1S34==4 & taps_2014$PARTYID3S34==3)|(taps_2014$PARTYID1S34==5 & taps_2014$PARTYID4S34==3)|(taps_2014$PARTYID1S34==1 & taps_2014$PARTYID4S34==3),1,ifelse(taps_2014$PARTYID1S34==4 & taps_2014$PARTYID3S34==1,0,NA)))))))
table(taps_2014$PARTYID1S34,taps_2014$S34_PID7)
sum(!is.na(taps_2014$S34_PID3))
sum(!is.na(taps_2014$S34_PID7))

##S36
taps_2014$S36_PID3 = ifelse(taps_2014$PARTYID1S36==2,-1,ifelse(taps_2014$PARTYID1S36==3,1,ifelse(taps_2014$PARTYID1S36==4,0,NA)))
table(taps_2014$PARTYID1S36,taps_2014$S36_PID3)
taps_2014$S36_PID7 = ifelse(taps_2014$PARTYID1S36==2 & taps_2014$PARTYID2S36==2,-3,ifelse(taps_2014$PARTYID1S36==2 & taps_2014$PARTYID2S36==3,-2,ifelse(taps_2014$PARTYID1S36==3 & taps_2014$PARTYID2S36==2,3,ifelse(taps_2014$PARTYID1S36==3 & taps_2014$PARTYID2S36==3,2,ifelse((taps_2014$PARTYID1S36==4 & taps_2014$PARTYID3S36==2)|(taps_2014$PARTYID1S36==5 & taps_2014$PARTYID4S36==2)|(taps_2014$PARTYID1S36==1 & taps_2014$PARTYID4S36==2),-1,ifelse((taps_2014$PARTYID1S36==4 & taps_2014$PARTYID3S36==3)|(taps_2014$PARTYID1S36==5 & taps_2014$PARTYID4S36==3)|(taps_2014$PARTYID1S36==1 & taps_2014$PARTYID4S36==3),1,ifelse(taps_2014$PARTYID1S36==4 & taps_2014$PARTYID3S36==1,0,NA)))))))
table(taps_2014$PARTYID1S36,taps_2014$S36_PID7)
sum(!is.na(taps_2014$S36_PID3))
sum(!is.na(taps_2014$S36_PID7))

##S38
taps_2015$S38_PID3 = ifelse(taps_2015$PARTYID1S38=="Democrat",-1,ifelse(taps_2015$PARTYID1S38=="Republican",1,ifelse(taps_2015$PARTYID1S38=="Independent",0,NA)))
table(taps_2015$PARTYID1S38,taps_2015$S38_PID3)
taps_2015$S38_PID7 = ifelse(taps_2015$PARTYID1S38=="Democrat" & taps_2015$PARTYID2S38=="Strong",-3,ifelse(taps_2015$PARTYID1S38=="Democrat"& taps_2015$PARTYID2S38=="Not Very Strong",-2,ifelse(taps_2015$PARTYID1S38=="Republican"& taps_2015$PARTYID2S38=="Strong",3,ifelse(taps_2015$PARTYID1S38=="Republican"& taps_2015$PARTYID2S38=="Not Very Strong",2,ifelse((taps_2015$PARTYID1S38=="Independent" & taps_2015$PARTYID3S38=="Democratic Party")|(taps_2015$PARTYID1S38=="Other, please specify:" & taps_2015$PARTYID4S38=="Democrats")|(taps_2015$PARTYID1S38=="Refused" & taps_2015$PARTYID4S38=="Democrats"),-1,ifelse((taps_2015$PARTYID1S38=="Independent" & taps_2015$PARTYID3S38=="Republican Party")|(taps_2015$PARTYID1S38=="Other, please specify:" & taps_2015$PARTYID4S38=="Republicans")|(taps_2015$PARTYID1S38=="Refused" & taps_2015$PARTYID4S38=="Republicans"),1,ifelse(taps_2015$PARTYID1S38=="Independent" & taps_2015$PARTYID3S38=="Refused",0,NA)))))))
table(taps_2015$PARTYID1S38,taps_2015$S38_PID7)
sum(!is.na(taps_2015$S38_PID3))
sum(!is.na(taps_2015$S38_PID7))

##S44
taps_2015$S44_PID3 = ifelse(taps_2015$PARTYID1S44=="Democrat",-1,ifelse(taps_2015$PARTYID1S44=="Republican",1,ifelse(taps_2015$PARTYID1S44=="Independent",0,NA)))
table(taps_2015$PARTYID1S44,taps_2015$S44_PID3)
taps_2015$S44_PID7 = ifelse(taps_2015$PARTYID1S44=="Democrat" & taps_2015$PARTYID2S44=="Strong",-3,ifelse(taps_2015$PARTYID1S44=="Democrat"& taps_2015$PARTYID2S44=="Not Very Strong",-2,ifelse(taps_2015$PARTYID1S44=="Republican"& taps_2015$PARTYID2S44=="Strong",3,ifelse(taps_2015$PARTYID1S44=="Republican"& taps_2015$PARTYID2S44=="Not Very Strong",2,ifelse((taps_2015$PARTYID1S44=="Independent" & taps_2015$PARTYID3S44=="Democratic Party")|(taps_2015$PARTYID1S44=="Other, please specify:" & taps_2015$PARTYID4S44=="Democrats")|(taps_2015$PARTYID1S44=="Refused" & taps_2015$PARTYID4S44=="Democrats"),-1,ifelse((taps_2015$PARTYID1S44=="Independent" & taps_2015$PARTYID3S44=="Republican Party")|(taps_2015$PARTYID1S44=="Other, please specify:" & taps_2015$PARTYID4S44=="Republicans")|(taps_2015$PARTYID1S44=="Refused" & taps_2015$PARTYID4S44=="Republicans"),1,ifelse(taps_2015$PARTYID1S44=="Independent" & taps_2015$PARTYID3S44=="Refused",0,NA)))))))
table(taps_2015$PARTYID1S44,taps_2015$S44_PID7)
sum(!is.na(taps_2015$S44_PID3))
sum(!is.na(taps_2015$S44_PID7))

##S46
taps_2015$S46_PID3 = ifelse(taps_2015$PARTYID1S46=="Democrat",-1,ifelse(taps_2015$PARTYID1S46=="Republican",1,ifelse(taps_2015$PARTYID1S46=="Independent",0,NA)))
table(taps_2015$PARTYID1S46,taps_2015$S46_PID3)
taps_2015$S46_PID7 = ifelse(taps_2015$PARTYID1S46=="Democrat" & taps_2015$PARTYID2S46=="Strong",-3,ifelse(taps_2015$PARTYID1S46=="Democrat"& taps_2015$PARTYID2S46=="Not Very Strong",-2,ifelse(taps_2015$PARTYID1S46=="Republican"& taps_2015$PARTYID2S46=="Strong",3,ifelse(taps_2015$PARTYID1S46=="Republican"& taps_2015$PARTYID2S46=="Not Very Strong",2,ifelse((taps_2015$PARTYID1S46=="Independent" & taps_2015$PARTYID3S46=="Democratic Party")|(taps_2015$PARTYID1S46=="Other, please specify:" & taps_2015$PARTYID4S46=="Democrats")|(taps_2015$PARTYID1S46=="Refused" & taps_2015$PARTYID4S46=="Democrats"),-1,ifelse((taps_2015$PARTYID1S46=="Independent" & taps_2015$PARTYID3S46=="Republican Party")|(taps_2015$PARTYID1S46=="Other, please specify:" & taps_2015$PARTYID4S46=="Republicans")|(taps_2015$PARTYID1S46=="Refused" & taps_2015$PARTYID4S46=="Republicans"),1,ifelse(taps_2015$PARTYID1S46=="Independent" & taps_2015$PARTYID3S46=="Refused",0,NA)))))))
table(taps_2015$PARTYID1S46,taps_2015$S46_PID7)
sum(!is.na(taps_2015$S46_PID3))
sum(!is.na(taps_2015$S46_PID7))

##S50
taps_2016$S50_PID3 = ifelse(taps_2016$PARTYID1S50==2,-1,ifelse(taps_2016$PARTYID1S50==3,1,ifelse(taps_2016$PARTYID1S50==4,0,NA)))
table(taps_2016$PARTYID1S50,taps_2016$S50_PID3)
taps_2016$S50_PID7 = ifelse(taps_2016$PARTYID1S50==2 & taps_2016$PARTYID2S50==2,-3,ifelse(taps_2016$PARTYID1S50==2 & taps_2016$PARTYID2S50==3,-2,ifelse(taps_2016$PARTYID1S50==3 & taps_2016$PARTYID2S50==2,3,ifelse(taps_2016$PARTYID1S50==3 & taps_2016$PARTYID2S50==3,2,ifelse((taps_2016$PARTYID1S50==4 & taps_2016$PARTYID3S50==2)|(taps_2016$PARTYID1S50==5 & taps_2016$PARTYID4S50==2)|(taps_2016$PARTYID1S50==1 & taps_2016$PARTYID4S50==2),-1,ifelse((taps_2016$PARTYID1S50==4 & taps_2016$PARTYID3S50==3)|(taps_2016$PARTYID1S50==5 & taps_2016$PARTYID4S50==3)|(taps_2016$PARTYID1S50==1 & taps_2016$PARTYID4S50==3),1,ifelse(taps_2016$PARTYID1S50==4 & taps_2016$PARTYID3S50==1,0,NA)))))))
table(taps_2016$PARTYID1S50,taps_2016$S50_PID7)
sum(!is.na(taps_2016$S50_PID3))
sum(!is.na(taps_2016$S50_PID7))

##S51
taps_2016$S51_PID3 = ifelse(taps_2016$PARTYID1S51==2,-1,ifelse(taps_2016$PARTYID1S51==3,1,ifelse(taps_2016$PARTYID1S51==4,0,NA)))
table(taps_2016$PARTYID1S51,taps_2016$S51_PID3)
taps_2016$S51_PID7 = ifelse(taps_2016$PARTYID1S51==2 & taps_2016$PARTYID2S51==2,-3,ifelse(taps_2016$PARTYID1S51==2 & taps_2016$PARTYID2S51==3,-2,ifelse(taps_2016$PARTYID1S51==3 & taps_2016$PARTYID2S51==2,3,ifelse(taps_2016$PARTYID1S51==3 & taps_2016$PARTYID2S51==3,2,ifelse((taps_2016$PARTYID1S51==4 & taps_2016$PARTYID3S51==2)|(taps_2016$PARTYID1S51==5 & taps_2016$PARTYID4S51==2)|(taps_2016$PARTYID1S51==1 & taps_2016$PARTYID4S51==2),-1,ifelse((taps_2016$PARTYID1S51==4 & taps_2016$PARTYID3S51==3)|(taps_2016$PARTYID1S51==5 & taps_2016$PARTYID4S51==3)|(taps_2016$PARTYID1S51==1 & taps_2016$PARTYID4S51==3),1,ifelse(taps_2016$PARTYID1S51==4 & taps_2016$PARTYID3S51==1,0,NA)))))))
table(taps_2016$PARTYID1S51,taps_2016$S51_PID7)
sum(!is.na(taps_2016$S51_PID3))
sum(!is.na(taps_2016$S51_PID7))

##S54
taps_2016$S54_PID3 = ifelse(taps_2016$PARTYID1S54==2,-1,ifelse(taps_2016$PARTYID1S54==3,1,ifelse(taps_2016$PARTYID1S54==4,0,NA)))
table(taps_2016$PARTYID1S54,taps_2016$S54_PID3)
taps_2016$S54_PID7 = ifelse(taps_2016$PARTYID1S54==2 & taps_2016$PARTYID2S54==2,-3,ifelse(taps_2016$PARTYID1S54==2 & taps_2016$PARTYID2S54==3,-2,ifelse(taps_2016$PARTYID1S54==3 & taps_2016$PARTYID2S54==2,3,ifelse(taps_2016$PARTYID1S54==3 & taps_2016$PARTYID2S54==3,2,ifelse((taps_2016$PARTYID1S54==4 & taps_2016$PARTYID3S54==2)|(taps_2016$PARTYID1S54==5 & taps_2016$PARTYID4S54==2)|(taps_2016$PARTYID1S54==1 & taps_2016$PARTYID4S54==2),-1,ifelse((taps_2016$PARTYID1S54==4 & taps_2016$PARTYID3S54==3)|(taps_2016$PARTYID1S54==5 & taps_2016$PARTYID4S54==3)|(taps_2016$PARTYID1S54==1 & taps_2016$PARTYID4S54==3),1,ifelse(taps_2016$PARTYID1S54==4 & taps_2016$PARTYID3S54==1,0,NA)))))))
table(taps_2016$PARTYID1S54,taps_2016$S54_PID7)
sum(!is.na(taps_2016$S54_PID3))
sum(!is.na(taps_2016$S54_PID7))

##S61
taps_2016$S61_PID3 = ifelse(taps_2016$PARTYID1S61==2,-1,ifelse(taps_2016$PARTYID1S61==3,1,ifelse(taps_2016$PARTYID1S61==4,0,NA)))
table(taps_2016$PARTYID1S61,taps_2016$S61_PID3)
taps_2016$S61_PID7 = ifelse(taps_2016$PARTYID1S61==2 & taps_2016$PARTYID2S61==2,-3,ifelse(taps_2016$PARTYID1S61==2 & taps_2016$PARTYID2S61==3,-2,ifelse(taps_2016$PARTYID1S61==3 & taps_2016$PARTYID2S61==2,3,ifelse(taps_2016$PARTYID1S61==3 & taps_2016$PARTYID2S61==3,2,ifelse((taps_2016$PARTYID1S61==4 & taps_2016$PARTYID3S61==2)|(taps_2016$PARTYID1S61==5 & taps_2016$PARTYID4S61==2)|(taps_2016$PARTYID1S61==1 & taps_2016$PARTYID4S61==2),-1,ifelse((taps_2016$PARTYID1S61==4 & taps_2016$PARTYID3S61==3)|(taps_2016$PARTYID1S61==5 & taps_2016$PARTYID4S61==3)|(taps_2016$PARTYID1S61==1 & taps_2016$PARTYID4S61==3),1,ifelse(taps_2016$PARTYID1S61==4 & taps_2016$PARTYID3S61==1,0,NA)))))))
table(taps_2016$PARTYID1S61,taps_2016$S61_PID7)
sum(!is.na(taps_2016$S61_PID3))
sum(!is.na(taps_2016$S61_PID7))

##S64
taps_2017$S64_PID3 = ifelse(taps_2017$PARTYID1S64==1,-1,ifelse(taps_2017$PARTYID1S64==2,1,ifelse(taps_2017$PARTYID1S64==3,0,NA)))
table(taps_2017$PARTYID1S64,taps_2017$S64_PID3)
taps_2017$S64_PID7 = ifelse(taps_2017$PARTYID1S64==1 & taps_2017$PARTYID2S64==1,-3,ifelse(taps_2017$PARTYID1S64==1 & taps_2017$PARTYID2S64==2,-2,ifelse(taps_2017$PARTYID1S64==2 & taps_2017$PARTYID2S64==1,3,ifelse(taps_2017$PARTYID1S64==2 & taps_2017$PARTYID2S64==2,2,ifelse((taps_2017$PARTYID1S64==3 & taps_2017$PARTYID3S64==1)|(taps_2017$PARTYID1S64==4 & taps_2017$PARTYID4S64==1)|(taps_2017$PARTYID1S64==-1 & taps_2017$PARTYID4S64==1),-1,ifelse((taps_2017$PARTYID1S64==3 & taps_2017$PARTYID3S64==2)|(taps_2017$PARTYID1S64==4 & taps_2017$PARTYID4S64==2)|(taps_2017$PARTYID1S64==-1 & taps_2017$PARTYID4S64==2),1,ifelse(taps_2017$PARTYID1S64==3 & taps_2017$PARTYID3S64==-1,0,NA)))))))
table(taps_2017$PARTYID1S64,taps_2017$S64_PID7)
sum(!is.na(taps_2017$S64_PID3))
sum(!is.na(taps_2017$S64_PID7))

##S66
taps_2017$S66_PID3 = ifelse(taps_2017$PARTYID1S66==1,-1,ifelse(taps_2017$PARTYID1S66==2,1,ifelse(taps_2017$PARTYID1S66==3,0,NA)))
table(taps_2017$PARTYID1S66,taps_2017$S66_PID3)
taps_2017$S66_PID7 = ifelse(taps_2017$PARTYID1S66==1 & taps_2017$PARTYID2S66==1,-3,ifelse(taps_2017$PARTYID1S66==1 & taps_2017$PARTYID2S66==2,-2,ifelse(taps_2017$PARTYID1S66==2 & taps_2017$PARTYID2S66==1,3,ifelse(taps_2017$PARTYID1S66==2 & taps_2017$PARTYID2S66==2,2,ifelse((taps_2017$PARTYID1S66==3 & taps_2017$PARTYID3S66==1)|(taps_2017$PARTYID1S66==4 & taps_2017$PARTYID4S66==1)|(taps_2017$PARTYID1S66==-1 & taps_2017$PARTYID4S66==1),-1,ifelse((taps_2017$PARTYID1S66==3 & taps_2017$PARTYID3S66==2)|(taps_2017$PARTYID1S66==4 & taps_2017$PARTYID4S66==2)|(taps_2017$PARTYID1S66==-1 & taps_2017$PARTYID4S66==2),1,ifelse(taps_2017$PARTYID1S66==3 & taps_2017$PARTYID3S66==-1,0,NA)))))))
table(taps_2017$PARTYID1S66,taps_2017$S66_PID7)
sum(!is.na(taps_2017$S66_PID3))
sum(!is.na(taps_2017$S66_PID7))

##S70
taps_2018$S70_PID3 = ifelse(taps_2018$PARTYID1S70==1,-1,ifelse(taps_2018$PARTYID1S70==2,1,ifelse(taps_2018$PARTYID1S70==3,0,NA)))
table(taps_2018$PARTYID1S70,taps_2018$S70_PID3)
taps_2018$S70_PID7 = ifelse(taps_2018$PARTYID1S70==1 & taps_2018$PARTYID2S70==1,-3,ifelse(taps_2018$PARTYID1S70==1 & taps_2018$PARTYID2S70==2,-2,ifelse(taps_2018$PARTYID1S70==2 & taps_2018$PARTYID2S70==1,3,ifelse(taps_2018$PARTYID1S70==2 & taps_2018$PARTYID2S70==2,2,ifelse((taps_2018$PARTYID1S70==3 & taps_2018$PARTYID3S70==1)|(taps_2018$PARTYID1S70==4 & taps_2018$PARTYID4S70==1)|(taps_2018$PARTYID1S70==-1 & taps_2018$PARTYID4S70==1),-1,ifelse((taps_2018$PARTYID1S70==3 & taps_2018$PARTYID3S70==2)|(taps_2018$PARTYID1S70==4 & taps_2018$PARTYID4S70==2)|(taps_2018$PARTYID1S70==-1 & taps_2018$PARTYID4S70==2),1,ifelse(taps_2018$PARTYID1S70==3 & taps_2018$PARTYID3S70==-1,0,NA)))))))
table(taps_2018$PARTYID1S70,taps_2018$S70_PID7)
sum(!is.na(taps_2018$S70_PID3))
sum(!is.na(taps_2018$S70_PID7))



####Computing number of surviving complete cases
##To compute the following, add in one survey wave at a time within a code line corresponding to a single data year file. For example, for survey year 2012, start with only c("WUSTLID","S1_PID3") before moving onto c("WUSTLID","S1_PID3","S5_PID3"), and so on. Once you finish a data year file, move onto the next data year file and repeat the iterative process. Merges function to combine the final totals from previous data year files with the running totals in a more recent data year file that you're currently iterating through.
#PID3
waves_PID3_2012 = taps_2012[,c("WUSTLID","S1_PID3","S5_PID3","S7_PID3","S10_PID3","S11_PID3","S12_PID3")]
waves_PID3_2012 = waves_PID3_2012[rowSums(is.na(waves_PID3_2012))==0,]
nrow(waves_PID3_2012)

waves_PID3_2013 = taps_2013[,c("WUSTLID","S17_PID3","S18_PID3","S20_PID3","S25_PID3")]
waves_PID3_2013 = waves_PID3_2013[rowSums(is.na(waves_PID3_2013))==0,]
nrow(merge(waves_PID3_2012,waves_PID3_2013, by="WUSTLID"))

waves_PID3_2012_2013 = merge(waves_PID3_2012,waves_PID3_2013, by="WUSTLID")

waves_PID3_2014 = taps_2014[,c("WUSTLID","S28_PID3","S31_PID3","S34_PID3","S36_PID3")]
waves_PID3_2014 = waves_PID3_2014[rowSums(is.na(waves_PID3_2014))==0,]
nrow(merge(waves_PID3_2012_2013,waves_PID3_2014, by="WUSTLID"))

waves_PID3_2012_2014 = merge(waves_PID3_2012_2013,waves_PID3_2014, by="WUSTLID")

waves_PID3_2015 = taps_2015[,c("WUSTLID","S38_PID3","S44_PID3","S46_PID3")]
waves_PID3_2015 = waves_PID3_2015[rowSums(is.na(waves_PID3_2015))==0,]
nrow(merge(waves_PID3_2012_2014,waves_PID3_2015, by="WUSTLID"))

waves_PID3_2012_2015 = merge(waves_PID3_2012_2014,waves_PID3_2015, by="WUSTLID")

waves_PID3_2016 = taps_2016[,c("WUSTLID","S50_PID3","S51_PID3","S54_PID3","S61_PID3")]
waves_PID3_2016 = waves_PID3_2016[rowSums(is.na(waves_PID3_2016))==0,]
nrow(merge(waves_PID3_2012_2015,waves_PID3_2016, by="WUSTLID"))

waves_PID3_2012_2016 = merge(waves_PID3_2012_2015,waves_PID3_2016, by="WUSTLID")

waves_PID3_2017 = taps_2017[,c("WUSTLID","S64_PID3","S66_PID3")]
waves_PID3_2017 = waves_PID3_2017[rowSums(is.na(waves_PID3_2017))==0,]
nrow(merge(waves_PID3_2012_2016,waves_PID3_2017, by="WUSTLID"))

waves_PID3_2012_2017 = merge(waves_PID3_2012_2016,waves_PID3_2017, by="WUSTLID")

waves_PID3_2018 = taps_2018[,c("WUSTLID","S70_PID3")]
waves_PID3_2018 = waves_PID3_2018[rowSums(is.na(waves_PID3_2018))==0,]
nrow(merge(waves_PID3_2012_2017,waves_PID3_2018, by="WUSTLID"))

complete_cases_PID3 = merge(waves_PID3_2012_2017,waves_PID3_2018, by="WUSTLID")

#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
nrow(waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,])

waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
nrow(merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID"))

waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")

waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
nrow(merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID"))

waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")

waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
nrow(merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID"))

waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")

waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
nrow(merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID"))

waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")

waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
nrow(merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID"))

waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")

waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
nrow(merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID"))

complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")

##Removing interim data sets
rm(list = ls()[grepl("waves_PID", ls())])


####Creating data set [SUBJECTS WITH ANY NUMBER OF MISSING VALUES]
##Assign a minimum number of missing values each row must have in order to remain in the data set. 0 is equivalent to all cases. 24 is the maximum value applicable.
missing_value_minimum = 0

##24 waves OUTER JOIN
waves2012_13 = merge(taps_2012,taps_2013, by="WUSTLID", all=TRUE)
waves2012_14 = merge(waves2012_13,taps_2014, by="WUSTLID", all=TRUE)
waves2012_15 = merge(waves2012_14,taps_2015, by="WUSTLID", all=TRUE)
waves2012_16 = merge(waves2012_15,taps_2016, by="WUSTLID", all=TRUE)
waves2012_17 = merge(waves2012_16,taps_2017, by="WUSTLID", all=TRUE)
waves_2012_18 = merge(waves2012_17,taps_2018, by="WUSTLID", all=TRUE)

##Removing interim data sets
rm(list = ls()[grepl("waves2012_1", ls())])

##Filtering by columns
all_waves_PID7 = waves_2012_18[, c("S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7","S17_PID7","S18_PID7","S20_PID7","S25_PID7","S28_PID7","S31_PID7","S34_PID7","S36_PID7","S38_PID7","S44_PID7","S46_PID7","S50_PID7","S51_PID7","S54_PID7","S61_PID7","S64_PID7","S66_PID7","S70_PID7")]
all_waves_PID3 = waves_2012_18[, c("S1_PID3","S5_PID3","S7_PID3","S10_PID3","S11_PID3","S12_PID3","S17_PID3","S18_PID3","S20_PID3","S25_PID3","S28_PID3","S31_PID3","S34_PID3","S36_PID3","S38_PID3","S44_PID3","S46_PID3","S50_PID3","S51_PID3","S54_PID3","S61_PID3","S64_PID3","S66_PID3","S70_PID3")]

##Filtering by rows
#Removing if too few NAs or too many NAs
all_waves_PID7 = all_waves_PID7[rowSums(is.na(all_waves_PID7))>=missing_value_minimum,]
all_waves_PID3 = all_waves_PID3[rowSums(is.na(all_waves_PID3))>=missing_value_minimum,]

summary(rowSums(is.na(all_waves_PID7))) #Checking that the correct type of cases remain
summary(rowSums(is.na(all_waves_PID3))) #Checking that the correct type of cases remain

##Computing statistics
#Correlations
cor_PID7 = round(cor(all_waves_PID7, use="pairwise.complete.obs"),3)
cor_PID3 = round(cor(all_waves_PID3, use="pairwise.complete.obs"),3)

lower.tri(cor_PID7, diag = FALSE)
upper.tri(cor_PID7, diag = FALSE)
lower_7<-cor_PID7
lower_7[lower.tri(cor_PID7, diag=TRUE)]=""
lower_7<-as.data.frame(lower_7)
lower_7
write.csv(lower_7, "./one_plus_missing_cases_PID7_correlations.csv")

lower.tri(cor_PID3, diag = FALSE)
upper.tri(cor_PID3, diag = FALSE)
lower_3<-cor_PID3
lower_3[lower.tri(cor_PID3, diag=TRUE)]=""
lower_3<-as.data.frame(lower_3)
lower_3
write.csv(lower_3, "./one_plus_missing_cases_PID3_correlations.csv")

#Counting the number of observations in each cell
library("psych")
cell_counts_PID7 = pairwiseCount(all_waves_PID7, diagonal=FALSE)
cell_counts_PID3 = pairwiseCount(all_waves_PID3, diagonal=FALSE)

write.csv(cell_counts_PID7, "./cell_counts_PID7_correlations.csv")
write.csv(cell_counts_PID3, "./cell_counts_PID3_correlations.csv")

#Means and SDs (Individuals)
all_waves_PID7$Mean_PID7 = apply(all_waves_PID7[,1:24], 1, mean, na.rm = TRUE)
all_waves_PID7$SD_PID7 = apply(all_waves_PID7[,1:24], 1, sd, na.rm = TRUE)

summary(all_waves_PID7$Mean_PID7)
summary(all_waves_PID7$SD_PID7)

all_waves_PID3$Mean_PID3 = apply(all_waves_PID3[,1:24], 1, mean, na.rm = TRUE)
all_waves_PID3$SD_PID3 = apply(all_waves_PID3[,1:24], 1, sd, na.rm = TRUE)

summary(all_waves_PID3$Mean_PID3)
summary(all_waves_PID3$SD_PID3)

#Means and SDs (Waves)
as.data.frame(round(apply(all_waves_PID7, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID7, 2, sd, na.rm=TRUE),2))

as.data.frame(round(apply(all_waves_PID3, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(all_waves_PID3, 2, sd, na.rm=TRUE),2))



####Creating data set [COMPLETE CASES ONLY]
#PID3
waves_PID3_2012 = taps_2012[,c("WUSTLID","S1_PID3","S5_PID3","S7_PID3","S10_PID3","S11_PID3","S12_PID3")]
waves_PID3_2012 = waves_PID3_2012[rowSums(is.na(waves_PID3_2012))==0,]
waves_PID3_2013 = taps_2013[,c("WUSTLID","S17_PID3","S18_PID3","S20_PID3","S25_PID3")]
waves_PID3_2013 = waves_PID3_2013[rowSums(is.na(waves_PID3_2013))==0,]
waves_PID3_2012_2013 = merge(waves_PID3_2012,waves_PID3_2013, by="WUSTLID")
waves_PID3_2014 = taps_2014[,c("WUSTLID","S28_PID3","S31_PID3","S34_PID3","S36_PID3")]
waves_PID3_2014 = waves_PID3_2014[rowSums(is.na(waves_PID3_2014))==0,]
waves_PID3_2012_2014 = merge(waves_PID3_2012_2013,waves_PID3_2014, by="WUSTLID")
waves_PID3_2015 = taps_2015[,c("WUSTLID","S38_PID3","S44_PID3","S46_PID3")]
waves_PID3_2015 = waves_PID3_2015[rowSums(is.na(waves_PID3_2015))==0,]
waves_PID3_2012_2015 = merge(waves_PID3_2012_2014,waves_PID3_2015, by="WUSTLID")
waves_PID3_2016 = taps_2016[,c("WUSTLID","S50_PID3","S51_PID3","S54_PID3","S61_PID3")]
waves_PID3_2016 = waves_PID3_2016[rowSums(is.na(waves_PID3_2016))==0,]
waves_PID3_2012_2016 = merge(waves_PID3_2012_2015,waves_PID3_2016, by="WUSTLID")
waves_PID3_2017 = taps_2017[,c("WUSTLID","S64_PID3","S66_PID3")]
waves_PID3_2017 = waves_PID3_2017[rowSums(is.na(waves_PID3_2017))==0,]
waves_PID3_2012_2017 = merge(waves_PID3_2012_2016,waves_PID3_2017, by="WUSTLID")
waves_PID3_2018 = taps_2018[,c("WUSTLID","S70_PID3")]
waves_PID3_2018 = waves_PID3_2018[rowSums(is.na(waves_PID3_2018))==0,]
complete_cases_PID3 = merge(waves_PID3_2012_2017,waves_PID3_2018, by="WUSTLID")
complete_cases_PID3 = complete_cases_PID3[,2:25]

#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

##Computing statistics
#Correlations
cor_PID7_listwise = round(cor(complete_cases_PID7),3)
cor_PID3_listwise = round(cor(complete_cases_PID3),3)

lower.tri(cor_PID7_listwise, diag = FALSE)
upper.tri(cor_PID7_listwise, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7_listwise[lower.tri(cor_PID7_listwise, diag=TRUE)]=""
lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./complete_cases_PID7_correlations.csv")

lower.tri(cor_PID3_listwise, diag = FALSE)
upper.tri(cor_PID3_listwise, diag = FALSE)
lower_3_listwise<-cor_PID3_listwise
lower_3_listwise[lower.tri(cor_PID3_listwise, diag=TRUE)]=""
lower_3_listwise<-as.data.frame(lower_3_listwise)
lower_3_listwise
write.csv(lower_3_listwise, "./complete_cases_PID3_correlations.csv")

library("psych")
pairwiseCount(complete_cases_PID7)
pairwiseCount(all_waves_PID3)

######
#To create the final correlation matrices for presentation, combine the pairwise correlations for the total cases (missing_value_minimum = 0) with the correlations for the complete case dataframes (which is equivalent to listwise deletion) to match the four matrices you have in the "Total Cases" tab and the "Complete Cases" tab
#PID7 Correlation Matrix (pairwise is lower diagonal, listwise is upper diagonal)
lower.tri(cor_PID7_listwise, diag = FALSE)
lower.tri(cor_PID7, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7<-cor_PID7

for (i in 1:nrow(lower_7_listwise)){
  for (j in 1:ncol(lower_7_listwise)){
    if (i>j){
      lower_7_listwise[i,j] = lower_7[i,j]
    }
    if (lower_7_listwise[i,j] == 1.0){
      lower_7_listwise[i,j] = "**"
    }
  }
}

lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./final_PID7_correlations.csv")

#PID3 Correlation Matrix (pairwise is lower diagonal, listwise is upper diagonal)
lower.tri(cor_PID3_listwise, diag = FALSE)
lower.tri(cor_PID3, diag = FALSE)
lower_3_listwise<-cor_PID3_listwise
lower_3<-cor_PID3

for (i in 1:nrow(lower_3_listwise)){
  for (j in 1:ncol(lower_3_listwise)){
    if (i>j){
      lower_3_listwise[i,j] = lower_3[i,j]
    }
    if (lower_3_listwise[i,j] == 1.0){
      lower_3_listwise[i,j] = "**"
    }
  }
}

lower_3_listwise<-as.data.frame(lower_3_listwise)
lower_3_listwise
write.csv(lower_3_listwise, "./final_PID3_correlations.csv")
######

#Means and SDs (Individuals)
complete_cases_PID7$Mean_PID7 = apply(complete_cases_PID7[,1:24], 1, mean)
complete_cases_PID7$SD_PID7 = apply(complete_cases_PID7[,1:24], 1, sd)

summary(complete_cases_PID7$Mean_PID7)
summary(complete_cases_PID7$SD_PID7)

complete_cases_PID3$Mean_PID3 = apply(complete_cases_PID3[,1:24], 1, mean)
complete_cases_PID3$SD_PID3 = apply(complete_cases_PID3[,1:24], 1, sd)

summary(complete_cases_PID3$Mean_PID3)
summary(complete_cases_PID3$SD_PID3)

#Means and SDs (Waves)
as.data.frame(round(apply(complete_cases_PID7, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(complete_cases_PID7, 2, sd, na.rm=TRUE),2))

as.data.frame(round(apply(complete_cases_PID3, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(complete_cases_PID3, 2, sd, na.rm=TRUE),2))



####Time Series and IV Regressions
###Creating data set [COMPLETE CASES ONLY]
#PID3
waves_PID3_2012 = taps_2012[,c("WUSTLID","S1_PID3","S5_PID3","S7_PID3","S10_PID3","S11_PID3","S12_PID3")]
waves_PID3_2012 = waves_PID3_2012[rowSums(is.na(waves_PID3_2012))==0,]
waves_PID3_2013 = taps_2013[,c("WUSTLID","S17_PID3","S18_PID3","S20_PID3","S25_PID3")]
waves_PID3_2013 = waves_PID3_2013[rowSums(is.na(waves_PID3_2013))==0,]
waves_PID3_2012_2013 = merge(waves_PID3_2012,waves_PID3_2013, by="WUSTLID")
waves_PID3_2014 = taps_2014[,c("WUSTLID","S28_PID3","S31_PID3","S34_PID3","S36_PID3")]
waves_PID3_2014 = waves_PID3_2014[rowSums(is.na(waves_PID3_2014))==0,]
waves_PID3_2012_2014 = merge(waves_PID3_2012_2013,waves_PID3_2014, by="WUSTLID")
waves_PID3_2015 = taps_2015[,c("WUSTLID","S38_PID3","S44_PID3","S46_PID3")]
waves_PID3_2015 = waves_PID3_2015[rowSums(is.na(waves_PID3_2015))==0,]
waves_PID3_2012_2015 = merge(waves_PID3_2012_2014,waves_PID3_2015, by="WUSTLID")
waves_PID3_2016 = taps_2016[,c("WUSTLID","S50_PID3","S51_PID3","S54_PID3","S61_PID3")]
waves_PID3_2016 = waves_PID3_2016[rowSums(is.na(waves_PID3_2016))==0,]
waves_PID3_2012_2016 = merge(waves_PID3_2012_2015,waves_PID3_2016, by="WUSTLID")
waves_PID3_2017 = taps_2017[,c("WUSTLID","S64_PID3","S66_PID3")]
waves_PID3_2017 = waves_PID3_2017[rowSums(is.na(waves_PID3_2017))==0,]
waves_PID3_2012_2017 = merge(waves_PID3_2012_2016,waves_PID3_2017, by="WUSTLID")
waves_PID3_2018 = taps_2018[,c("WUSTLID","S70_PID3")]
waves_PID3_2018 = waves_PID3_2018[rowSums(is.na(waves_PID3_2018))==0,]
complete_cases_PID3 = merge(waves_PID3_2012_2017,waves_PID3_2018, by="WUSTLID")
complete_cases_PID3 = complete_cases_PID3[,2:25]

#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

##Creating multiple dataframes
complete_cases_1_5_7_PID7 = complete_cases_PID7[, c('S1_PID7','S5_PID7','S7_PID7')]
complete_cases_5_7_10_PID7 = complete_cases_PID7[, c('S5_PID7','S7_PID7','S10_PID7')]
complete_cases_7_10_11_PID7 = complete_cases_PID7[, c('S7_PID7','S10_PID7','S11_PID7')]
complete_cases_10_11_12_PID7 = complete_cases_PID7[, c('S10_PID7','S11_PID7','S12_PID7')]
complete_cases_11_12_17_PID7 = complete_cases_PID7[, c('S11_PID7','S12_PID7','S17_PID7')]
complete_cases_12_17_18_PID7 = complete_cases_PID7[, c('S12_PID7','S17_PID7','S18_PID7')]
complete_cases_17_18_20_PID7 = complete_cases_PID7[, c('S17_PID7','S18_PID7','S20_PID7')]
complete_cases_18_20_25_PID7 = complete_cases_PID7[, c('S18_PID7','S20_PID7','S25_PID7')]
complete_cases_20_25_28_PID7 = complete_cases_PID7[, c('S20_PID7','S25_PID7','S28_PID7')]
complete_cases_25_28_31_PID7 = complete_cases_PID7[, c('S25_PID7','S28_PID7','S31_PID7')]
complete_cases_28_31_34_PID7 = complete_cases_PID7[, c('S28_PID7','S31_PID7','S34_PID7')]
complete_cases_31_34_36_PID7 = complete_cases_PID7[, c('S31_PID7','S34_PID7','S36_PID7')]
complete_cases_34_36_38_PID7 = complete_cases_PID7[, c('S34_PID7','S36_PID7','S38_PID7')]
complete_cases_36_38_44_PID7 = complete_cases_PID7[, c('S36_PID7','S38_PID7','S44_PID7')]
complete_cases_38_44_46_PID7 = complete_cases_PID7[, c('S38_PID7','S44_PID7','S46_PID7')]
complete_cases_44_46_50_PID7 = complete_cases_PID7[, c('S44_PID7','S46_PID7','S50_PID7')]
complete_cases_46_50_51_PID7 = complete_cases_PID7[, c('S46_PID7','S50_PID7','S51_PID7')]
complete_cases_50_51_54_PID7 = complete_cases_PID7[, c('S50_PID7','S51_PID7','S54_PID7')]
complete_cases_51_54_61_PID7 = complete_cases_PID7[, c('S51_PID7','S54_PID7','S61_PID7')]
complete_cases_54_61_64_PID7 = complete_cases_PID7[, c('S54_PID7','S61_PID7','S64_PID7')]
complete_cases_61_64_66_PID7 = complete_cases_PID7[, c('S61_PID7','S64_PID7','S66_PID7')]
complete_cases_64_66_70_PID7 = complete_cases_PID7[, c('S64_PID7','S66_PID7','S70_PID7')]

names(complete_cases_1_5_7_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_5_7_10_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_7_10_11_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_10_11_12_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_11_12_17_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_12_17_18_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_17_18_20_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_18_20_25_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_20_25_28_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_25_28_31_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_28_31_34_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_31_34_36_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_34_36_38_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_36_38_44_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_38_44_46_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_44_46_50_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_46_50_51_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_50_51_54_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_51_54_61_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_54_61_64_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_61_64_66_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_64_66_70_PID7)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')

complete_cases_1_5_7_PID3 = complete_cases_PID3[, c('S1_PID3','S5_PID3','S7_PID3')]
complete_cases_5_7_10_PID3 = complete_cases_PID3[, c('S5_PID3','S7_PID3','S10_PID3')]
complete_cases_7_10_11_PID3 = complete_cases_PID3[, c('S7_PID3','S10_PID3','S11_PID3')]
complete_cases_10_11_12_PID3 = complete_cases_PID3[, c('S10_PID3','S11_PID3','S12_PID3')]
complete_cases_11_12_17_PID3 = complete_cases_PID3[, c('S11_PID3','S12_PID3','S17_PID3')]
complete_cases_12_17_18_PID3 = complete_cases_PID3[, c('S12_PID3','S17_PID3','S18_PID3')]
complete_cases_17_18_20_PID3 = complete_cases_PID3[, c('S17_PID3','S18_PID3','S20_PID3')]
complete_cases_18_20_25_PID3 = complete_cases_PID3[, c('S18_PID3','S20_PID3','S25_PID3')]
complete_cases_20_25_28_PID3 = complete_cases_PID3[, c('S20_PID3','S25_PID3','S28_PID3')]
complete_cases_25_28_31_PID3 = complete_cases_PID3[, c('S25_PID3','S28_PID3','S31_PID3')]
complete_cases_28_31_34_PID3 = complete_cases_PID3[, c('S28_PID3','S31_PID3','S34_PID3')]
complete_cases_31_34_36_PID3 = complete_cases_PID3[, c('S31_PID3','S34_PID3','S36_PID3')]
complete_cases_34_36_38_PID3 = complete_cases_PID3[, c('S34_PID3','S36_PID3','S38_PID3')]
complete_cases_36_38_44_PID3 = complete_cases_PID3[, c('S36_PID3','S38_PID3','S44_PID3')]
complete_cases_38_44_46_PID3 = complete_cases_PID3[, c('S38_PID3','S44_PID3','S46_PID3')]
complete_cases_44_46_50_PID3 = complete_cases_PID3[, c('S44_PID3','S46_PID3','S50_PID3')]
complete_cases_46_50_51_PID3 = complete_cases_PID3[, c('S46_PID3','S50_PID3','S51_PID3')]
complete_cases_50_51_54_PID3 = complete_cases_PID3[, c('S50_PID3','S51_PID3','S54_PID3')]
complete_cases_51_54_61_PID3 = complete_cases_PID3[, c('S51_PID3','S54_PID3','S61_PID3')]
complete_cases_54_61_64_PID3 = complete_cases_PID3[, c('S54_PID3','S61_PID3','S64_PID3')]
complete_cases_61_64_66_PID3 = complete_cases_PID3[, c('S61_PID3','S64_PID3','S66_PID3')]
complete_cases_64_66_70_PID3 = complete_cases_PID3[, c('S64_PID3','S66_PID3','S70_PID3')]

names(complete_cases_1_5_7_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_5_7_10_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_7_10_11_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_10_11_12_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_11_12_17_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_12_17_18_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_17_18_20_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_18_20_25_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_20_25_28_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_25_28_31_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_28_31_34_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_31_34_36_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_34_36_38_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_36_38_44_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_38_44_46_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_44_46_50_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_46_50_51_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_50_51_54_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_51_54_61_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_54_61_64_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_61_64_66_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_64_66_70_PID3)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')

##Creating combined dataframes
complete_cases_rbind_PID7 = rbind(complete_cases_1_5_7_PID7,complete_cases_5_7_10_PID7,complete_cases_7_10_11_PID7,complete_cases_10_11_12_PID7,complete_cases_11_12_17_PID7,complete_cases_12_17_18_PID7,complete_cases_17_18_20_PID7,complete_cases_18_20_25_PID7,complete_cases_20_25_28_PID7,complete_cases_25_28_31_PID7,complete_cases_28_31_34_PID7,complete_cases_31_34_36_PID7,complete_cases_34_36_38_PID7,complete_cases_36_38_44_PID7,complete_cases_38_44_46_PID7,complete_cases_44_46_50_PID7,complete_cases_46_50_51_PID7,complete_cases_50_51_54_PID7,complete_cases_51_54_61_PID7,complete_cases_54_61_64_PID7,complete_cases_61_64_66_PID7,complete_cases_64_66_70_PID7)
complete_cases_rbind_PID3 = rbind(complete_cases_1_5_7_PID3,complete_cases_5_7_10_PID3,complete_cases_7_10_11_PID3,complete_cases_10_11_12_PID3,complete_cases_11_12_17_PID3,complete_cases_12_17_18_PID3,complete_cases_17_18_20_PID3,complete_cases_18_20_25_PID3,complete_cases_20_25_28_PID3,complete_cases_25_28_31_PID3,complete_cases_28_31_34_PID3,complete_cases_31_34_36_PID3,complete_cases_34_36_38_PID3,complete_cases_36_38_44_PID3,complete_cases_38_44_46_PID3,complete_cases_44_46_50_PID3,complete_cases_46_50_51_PID3,complete_cases_50_51_54_PID3,complete_cases_51_54_61_PID3,complete_cases_54_61_64_PID3,complete_cases_61_64_66_PID3,complete_cases_64_66_70_PID3)

##Adding a wave column
w1_PID7 = as.data.frame(rep('S1',nrow(complete_cases_1_5_7_PID7)))
names(w1_PID7)[1] <- 'wave'
w2_PID7 = as.data.frame(rep('S5',nrow(complete_cases_5_7_10_PID7)))
names(w2_PID7)[1] <- 'wave'
w3_PID7 = as.data.frame(rep('S7',nrow(complete_cases_7_10_11_PID7)))
names(w3_PID7)[1] <- 'wave'
w4_PID7 = as.data.frame(rep('S10',nrow(complete_cases_10_11_12_PID7)))
names(w4_PID7)[1] <- 'wave'
w5_PID7 = as.data.frame(rep('S11',nrow(complete_cases_11_12_17_PID7)))
names(w5_PID7)[1] <- 'wave'
w6_PID7 = as.data.frame(rep('S12',nrow(complete_cases_12_17_18_PID7)))
names(w6_PID7)[1] <- 'wave'
w7_PID7 = as.data.frame(rep('S17',nrow(complete_cases_17_18_20_PID7)))
names(w7_PID7)[1] <- 'wave'
w8_PID7 = as.data.frame(rep('S18',nrow(complete_cases_18_20_25_PID7)))
names(w8_PID7)[1] <- 'wave'
w9_PID7 = as.data.frame(rep('S20',nrow(complete_cases_20_25_28_PID7)))
names(w9_PID7)[1] <- 'wave'
w10_PID7 = as.data.frame(rep('S25',nrow(complete_cases_25_28_31_PID7)))
names(w10_PID7)[1] <- 'wave'
w11_PID7 = as.data.frame(rep('S28',nrow(complete_cases_28_31_34_PID7)))
names(w11_PID7)[1] <- 'wave'
w12_PID7 = as.data.frame(rep('S31',nrow(complete_cases_31_34_36_PID7)))
names(w12_PID7)[1] <- 'wave'
w13_PID7 = as.data.frame(rep('S34',nrow(complete_cases_34_36_38_PID7)))
names(w13_PID7)[1] <- 'wave'
w14_PID7 = as.data.frame(rep('S36',nrow(complete_cases_36_38_44_PID7)))
names(w14_PID7)[1] <- 'wave'
w15_PID7 = as.data.frame(rep('S38',nrow(complete_cases_38_44_46_PID7)))
names(w15_PID7)[1] <- 'wave'
w16_PID7 = as.data.frame(rep('S44',nrow(complete_cases_44_46_50_PID7)))
names(w16_PID7)[1] <- 'wave'
w17_PID7 = as.data.frame(rep('S46',nrow(complete_cases_46_50_51_PID7)))
names(w17_PID7)[1] <- 'wave'
w18_PID7 = as.data.frame(rep('S50',nrow(complete_cases_50_51_54_PID7)))
names(w18_PID7)[1] <- 'wave'
w19_PID7 = as.data.frame(rep('S51',nrow(complete_cases_51_54_61_PID7)))
names(w19_PID7)[1] <- 'wave'
w20_PID7 = as.data.frame(rep('S54',nrow(complete_cases_54_61_64_PID7)))
names(w20_PID7)[1] <- 'wave'
w21_PID7 = as.data.frame(rep('S61',nrow(complete_cases_61_64_66_PID7)))
names(w21_PID7)[1] <- 'wave'
w22_PID7 = as.data.frame(rep('S64',nrow(complete_cases_64_66_70_PID7)))
names(w22_PID7)[1] <- 'wave'

w1_PID3 = as.data.frame(rep('S1',nrow(complete_cases_1_5_7_PID3)))
names(w1_PID3)[1] <- 'wave'
w2_PID3 = as.data.frame(rep('S5',nrow(complete_cases_5_7_10_PID3)))
names(w2_PID3)[1] <- 'wave'
w3_PID3 = as.data.frame(rep('S7',nrow(complete_cases_7_10_11_PID3)))
names(w3_PID3)[1] <- 'wave'
w4_PID3 = as.data.frame(rep('S10',nrow(complete_cases_10_11_12_PID3)))
names(w4_PID3)[1] <- 'wave'
w5_PID3 = as.data.frame(rep('S11',nrow(complete_cases_11_12_17_PID3)))
names(w5_PID3)[1] <- 'wave'
w6_PID3 = as.data.frame(rep('S12',nrow(complete_cases_12_17_18_PID3)))
names(w6_PID3)[1] <- 'wave'
w7_PID3 = as.data.frame(rep('S17',nrow(complete_cases_17_18_20_PID3)))
names(w7_PID3)[1] <- 'wave'
w8_PID3 = as.data.frame(rep('S18',nrow(complete_cases_18_20_25_PID3)))
names(w8_PID3)[1] <- 'wave'
w9_PID3 = as.data.frame(rep('S20',nrow(complete_cases_20_25_28_PID3)))
names(w9_PID3)[1] <- 'wave'
w10_PID3 = as.data.frame(rep('S25',nrow(complete_cases_25_28_31_PID3)))
names(w10_PID3)[1] <- 'wave'
w11_PID3 = as.data.frame(rep('S28',nrow(complete_cases_28_31_34_PID3)))
names(w11_PID3)[1] <- 'wave'
w12_PID3 = as.data.frame(rep('S31',nrow(complete_cases_31_34_36_PID3)))
names(w12_PID3)[1] <- 'wave'
w13_PID3 = as.data.frame(rep('S34',nrow(complete_cases_34_36_38_PID3)))
names(w13_PID3)[1] <- 'wave'
w14_PID3 = as.data.frame(rep('S36',nrow(complete_cases_36_38_44_PID3)))
names(w14_PID3)[1] <- 'wave'
w15_PID3 = as.data.frame(rep('S38',nrow(complete_cases_38_44_46_PID3)))
names(w15_PID3)[1] <- 'wave'
w16_PID3 = as.data.frame(rep('S44',nrow(complete_cases_44_46_50_PID3)))
names(w16_PID3)[1] <- 'wave'
w17_PID3 = as.data.frame(rep('S46',nrow(complete_cases_46_50_51_PID3)))
names(w17_PID3)[1] <- 'wave'
w18_PID3 = as.data.frame(rep('S50',nrow(complete_cases_50_51_54_PID3)))
names(w18_PID3)[1] <- 'wave'
w19_PID3 = as.data.frame(rep('S51',nrow(complete_cases_51_54_61_PID3)))
names(w19_PID3)[1] <- 'wave'
w20_PID3 = as.data.frame(rep('S54',nrow(complete_cases_54_61_64_PID3)))
names(w20_PID3)[1] <- 'wave'
w21_PID3 = as.data.frame(rep('S61',nrow(complete_cases_61_64_66_PID3)))
names(w21_PID3)[1] <- 'wave'
w22_PID3 = as.data.frame(rep('S64',nrow(complete_cases_64_66_70_PID3)))
names(w22_PID3)[1] <- 'wave'

complete_cases_rbind_PID7_waves = rbind(w1_PID7,w2_PID7,w3_PID7,w4_PID7,w5_PID7,w6_PID7,w7_PID7,w8_PID7,w9_PID7,w10_PID7,w11_PID7,w12_PID7,w13_PID7,w14_PID7,w15_PID7,w16_PID7,w17_PID7,w18_PID7,w19_PID7,w20_PID7,w21_PID7,w22_PID7)
complete_cases_rbind_PID7 = cbind(complete_cases_rbind_PID7,complete_cases_rbind_PID7_waves)
summary(complete_cases_rbind_PID7)

complete_cases_rbind_PID3_waves = rbind(w1_PID3,w2_PID3,w3_PID3,w4_PID3,w5_PID3,w6_PID3,w7_PID3,w8_PID3,w9_PID3,w10_PID3,w11_PID3,w12_PID3,w13_PID3,w14_PID3,w15_PID3,w16_PID3,w17_PID3,w18_PID3,w19_PID3,w20_PID3,w21_PID3,w22_PID3)
complete_cases_rbind_PID3 = cbind(complete_cases_rbind_PID3,complete_cases_rbind_PID3_waves)
summary(complete_cases_rbind_PID3)

##Time Series Regressions
summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_rbind_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_rbind_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_1_5_7_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_1_5_7_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_17_18_20_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_17_18_20_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_38_44_46_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_38_44_46_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_64_66_70_PID7))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_64_66_70_PID7))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_rbind_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_rbind_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_1_5_7_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_1_5_7_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_17_18_20_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_17_18_20_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_38_44_46_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_38_44_46_PID3))

summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_64_66_70_PID3))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_64_66_70_PID3))

##IV Regressions
library('ivreg')
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_rbind_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_1_5_7_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_17_18_20_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_38_44_46_PID7))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_64_66_70_PID7))

summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_rbind_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_1_5_7_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_17_18_20_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_38_44_46_PID3))
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_64_66_70_PID3))



####Replicating Ansolabehere, Rodden, and Snyder (2008) by taking simple averages
###Creating data set [COMPLETE CASES ONLY]
#PID3
waves_PID3_2012 = taps_2012[,c("WUSTLID","S1_PID3","S5_PID3","S7_PID3","S10_PID3","S11_PID3","S12_PID3")]
waves_PID3_2012 = waves_PID3_2012[rowSums(is.na(waves_PID3_2012))==0,]
waves_PID3_2013 = taps_2013[,c("WUSTLID","S17_PID3","S18_PID3","S20_PID3","S25_PID3")]
waves_PID3_2013 = waves_PID3_2013[rowSums(is.na(waves_PID3_2013))==0,]
waves_PID3_2012_2013 = merge(waves_PID3_2012,waves_PID3_2013, by="WUSTLID")
waves_PID3_2014 = taps_2014[,c("WUSTLID","S28_PID3","S31_PID3","S34_PID3","S36_PID3")]
waves_PID3_2014 = waves_PID3_2014[rowSums(is.na(waves_PID3_2014))==0,]
waves_PID3_2012_2014 = merge(waves_PID3_2012_2013,waves_PID3_2014, by="WUSTLID")
waves_PID3_2015 = taps_2015[,c("WUSTLID","S38_PID3","S44_PID3","S46_PID3")]
waves_PID3_2015 = waves_PID3_2015[rowSums(is.na(waves_PID3_2015))==0,]
waves_PID3_2012_2015 = merge(waves_PID3_2012_2014,waves_PID3_2015, by="WUSTLID")
waves_PID3_2016 = taps_2016[,c("WUSTLID","S50_PID3","S51_PID3","S54_PID3","S61_PID3")]
waves_PID3_2016 = waves_PID3_2016[rowSums(is.na(waves_PID3_2016))==0,]
waves_PID3_2012_2016 = merge(waves_PID3_2012_2015,waves_PID3_2016, by="WUSTLID")
waves_PID3_2017 = taps_2017[,c("WUSTLID","S64_PID3","S66_PID3")]
waves_PID3_2017 = waves_PID3_2017[rowSums(is.na(waves_PID3_2017))==0,]
waves_PID3_2012_2017 = merge(waves_PID3_2012_2016,waves_PID3_2017, by="WUSTLID")
waves_PID3_2018 = taps_2018[,c("WUSTLID","S70_PID3")]
waves_PID3_2018 = waves_PID3_2018[rowSums(is.na(waves_PID3_2018))==0,]
complete_cases_PID3 = merge(waves_PID3_2012_2017,waves_PID3_2018, by="WUSTLID")
complete_cases_PID3 = complete_cases_PID3[,2:25]

#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

##Creating dataframes
complete_cases_PID7$pid7_S1_S18 = apply(complete_cases_PID7[,1:8], 1, mean, na.rm = TRUE)
complete_cases_PID7$pid7_S20_S44 = apply(complete_cases_PID7[,9:16], 1, mean, na.rm = TRUE)
complete_cases_PID7$pid7_S46_S70 = apply(complete_cases_PID7[,17:24], 1, mean, na.rm = TRUE)
PID7_scales = complete_cases_PID7[,25:27]

complete_cases_PID3$pid3_S1_S18 = apply(complete_cases_PID3[,1:8], 1, mean, na.rm = TRUE)
complete_cases_PID3$pid3_S20_S44 = apply(complete_cases_PID3[,9:16], 1, mean, na.rm = TRUE)
complete_cases_PID3$pid3_S46_S70 = apply(complete_cases_PID3[,17:24], 1, mean, na.rm = TRUE)
PID3_scales = complete_cases_PID3[,25:27]

##Computing statistics
#Correlations
cor_PID7 = round(cor(PID7_scales, use="pairwise.complete.obs"),3)
cor_PID3 = round(cor(PID3_scales, use="pairwise.complete.obs"),3)

lower.tri(cor_PID7, diag = FALSE)
upper.tri(cor_PID7, diag = FALSE)
lower_7<-cor_PID7
lower_7[lower.tri(cor_PID7, diag=TRUE)]=""
lower_7<-as.data.frame(lower_7)
lower_7
write.csv(lower_7, "./complete_cases_PID7_scales_correlations.csv")

lower.tri(cor_PID3, diag = FALSE)
upper.tri(cor_PID3, diag = FALSE)
lower_3<-cor_PID3
lower_3[lower.tri(cor_PID3, diag=TRUE)]=""
lower_3<-as.data.frame(lower_3)
lower_3
write.csv(lower_3, "./complete_cases_PID3_scales_correlations.csv")

#Time Series Regressions
summary(lm(pid7_S46_S70 ~ pid7_S20_S44, data=PID7_scales))
summary(lm(pid7_S46_S70 ~ pid7_S20_S44 + pid7_S1_S18, data=PID7_scales))

summary(lm(pid3_S46_S70 ~ pid3_S20_S44, data=PID3_scales))
summary(lm(pid3_S46_S70 ~ pid3_S20_S44 + pid3_S1_S18, data=PID3_scales))

#IV Regressions
library('ivreg')
summary(ivreg(pid7_S46_S70 ~ pid7_S20_S44, ~ pid7_S1_S18, data=PID7_scales))

summary(ivreg(pid3_S46_S70 ~ pid3_S20_S44, ~ pid3_S1_S18, data=PID3_scales))



####Alternative PID3 (leaners classified as partisans rather than Independents)
###Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

taps_alt_PID3 = complete_cases_PID7

taps_alt_PID3$S1_PID3_alt = ifelse(taps_alt_PID3$S1_PID7<=-1,-1,ifelse(taps_alt_PID3$S1_PID7>=1,1,0))
table(taps_alt_PID3$S1_PID3_alt)
table(taps_alt_PID3$S1_PID7)
table(taps_alt_PID3$S1_PID3_alt,taps_alt_PID3$S1_PID7)

taps_alt_PID3$S5_PID3_alt = ifelse(taps_alt_PID3$S5_PID7<=-1,-1,ifelse(taps_alt_PID3$S5_PID7>=1,1,0))
table(taps_alt_PID3$S5_PID3_alt)
table(taps_alt_PID3$S5_PID7)
table(taps_alt_PID3$S5_PID3_alt,taps_alt_PID3$S5_PID7)

taps_alt_PID3$S7_PID3_alt = ifelse(taps_alt_PID3$S7_PID7<=-1,-1,ifelse(taps_alt_PID3$S7_PID7>=1,1,0))
table(taps_alt_PID3$S7_PID3_alt)
table(taps_alt_PID3$S7_PID7)
table(taps_alt_PID3$S7_PID3_alt,taps_alt_PID3$S7_PID7)

taps_alt_PID3$S10_PID3_alt = ifelse(taps_alt_PID3$S10_PID7<=-1,-1,ifelse(taps_alt_PID3$S10_PID7>=1,1,0))
table(taps_alt_PID3$S10_PID3_alt)
table(taps_alt_PID3$S10_PID7)
table(taps_alt_PID3$S10_PID3_alt,taps_alt_PID3$S10_PID7)

taps_alt_PID3$S11_PID3_alt = ifelse(taps_alt_PID3$S11_PID7<=-1,-1,ifelse(taps_alt_PID3$S11_PID7>=1,1,0))
table(taps_alt_PID3$S11_PID3_alt)
table(taps_alt_PID3$S11_PID7)
table(taps_alt_PID3$S11_PID3_alt,taps_alt_PID3$S11_PID7)

taps_alt_PID3$S12_PID3_alt = ifelse(taps_alt_PID3$S12_PID7<=-1,-1,ifelse(taps_alt_PID3$S12_PID7>=1,1,0))
table(taps_alt_PID3$S12_PID3_alt)
table(taps_alt_PID3$S12_PID7)
table(taps_alt_PID3$S12_PID3_alt,taps_alt_PID3$S12_PID7)

taps_alt_PID3$S17_PID3_alt = ifelse(taps_alt_PID3$S17_PID7<=-1,-1,ifelse(taps_alt_PID3$S17_PID7>=1,1,0))
table(taps_alt_PID3$S17_PID3_alt)
table(taps_alt_PID3$S17_PID7)
table(taps_alt_PID3$S17_PID3_alt,taps_alt_PID3$S17_PID7)

taps_alt_PID3$S18_PID3_alt = ifelse(taps_alt_PID3$S18_PID7<=-1,-1,ifelse(taps_alt_PID3$S18_PID7>=1,1,0))
table(taps_alt_PID3$S18_PID3_alt)
table(taps_alt_PID3$S18_PID7)
table(taps_alt_PID3$S18_PID3_alt,taps_alt_PID3$S18_PID7)

taps_alt_PID3$S20_PID3_alt = ifelse(taps_alt_PID3$S20_PID7<=-1,-1,ifelse(taps_alt_PID3$S20_PID7>=1,1,0))
table(taps_alt_PID3$S20_PID3_alt)
table(taps_alt_PID3$S20_PID7)
table(taps_alt_PID3$S20_PID3_alt,taps_alt_PID3$S20_PID7)

taps_alt_PID3$S25_PID3_alt = ifelse(taps_alt_PID3$S25_PID7<=-1,-1,ifelse(taps_alt_PID3$S25_PID7>=1,1,0))
table(taps_alt_PID3$S25_PID3_alt)
table(taps_alt_PID3$S25_PID7)
table(taps_alt_PID3$S25_PID3_alt,taps_alt_PID3$S25_PID7)

taps_alt_PID3$S28_PID3_alt = ifelse(taps_alt_PID3$S28_PID7<=-1,-1,ifelse(taps_alt_PID3$S28_PID7>=1,1,0))
table(taps_alt_PID3$S28_PID3_alt)
table(taps_alt_PID3$S28_PID7)
table(taps_alt_PID3$S28_PID3_alt,taps_alt_PID3$S28_PID7)

taps_alt_PID3$S31_PID3_alt = ifelse(taps_alt_PID3$S31_PID7<=-1,-1,ifelse(taps_alt_PID3$S31_PID7>=1,1,0))
table(taps_alt_PID3$S31_PID3_alt)
table(taps_alt_PID3$S31_PID7)
table(taps_alt_PID3$S31_PID3_alt,taps_alt_PID3$S31_PID7)

taps_alt_PID3$S34_PID3_alt = ifelse(taps_alt_PID3$S34_PID7<=-1,-1,ifelse(taps_alt_PID3$S34_PID7>=1,1,0))
table(taps_alt_PID3$S34_PID3_alt)
table(taps_alt_PID3$S34_PID7)
table(taps_alt_PID3$S34_PID3_alt,taps_alt_PID3$S34_PID7)

taps_alt_PID3$S36_PID3_alt = ifelse(taps_alt_PID3$S36_PID7<=-1,-1,ifelse(taps_alt_PID3$S36_PID7>=1,1,0))
table(taps_alt_PID3$S36_PID3_alt)
table(taps_alt_PID3$S36_PID7)
table(taps_alt_PID3$S36_PID3_alt,taps_alt_PID3$S36_PID7)

taps_alt_PID3$S38_PID3_alt = ifelse(taps_alt_PID3$S38_PID7<=-1,-1,ifelse(taps_alt_PID3$S38_PID7>=1,1,0))
table(taps_alt_PID3$S38_PID3_alt)
table(taps_alt_PID3$S38_PID7)
table(taps_alt_PID3$S38_PID3_alt,taps_alt_PID3$S38_PID7)

taps_alt_PID3$S44_PID3_alt = ifelse(taps_alt_PID3$S44_PID7<=-1,-1,ifelse(taps_alt_PID3$S44_PID7>=1,1,0))
table(taps_alt_PID3$S44_PID3_alt)
table(taps_alt_PID3$S44_PID7)
table(taps_alt_PID3$S44_PID3_alt,taps_alt_PID3$S44_PID7)

taps_alt_PID3$S46_PID3_alt = ifelse(taps_alt_PID3$S46_PID7<=-1,-1,ifelse(taps_alt_PID3$S46_PID7>=1,1,0))
table(taps_alt_PID3$S46_PID3_alt)
table(taps_alt_PID3$S46_PID7)
table(taps_alt_PID3$S46_PID3_alt,taps_alt_PID3$S46_PID7)

taps_alt_PID3$S50_PID3_alt = ifelse(taps_alt_PID3$S50_PID7<=-1,-1,ifelse(taps_alt_PID3$S50_PID7>=1,1,0))
table(taps_alt_PID3$S50_PID3_alt)
table(taps_alt_PID3$S50_PID7)
table(taps_alt_PID3$S50_PID3_alt,taps_alt_PID3$S50_PID7)

taps_alt_PID3$S51_PID3_alt = ifelse(taps_alt_PID3$S51_PID7<=-1,-1,ifelse(taps_alt_PID3$S51_PID7>=1,1,0))
table(taps_alt_PID3$S51_PID3_alt)
table(taps_alt_PID3$S51_PID7)
table(taps_alt_PID3$S51_PID3_alt,taps_alt_PID3$S51_PID7)

taps_alt_PID3$S54_PID3_alt = ifelse(taps_alt_PID3$S54_PID7<=-1,-1,ifelse(taps_alt_PID3$S54_PID7>=1,1,0))
table(taps_alt_PID3$S54_PID3_alt)
table(taps_alt_PID3$S54_PID7)
table(taps_alt_PID3$S54_PID3_alt,taps_alt_PID3$S54_PID7)

taps_alt_PID3$S61_PID3_alt = ifelse(taps_alt_PID3$S61_PID7<=-1,-1,ifelse(taps_alt_PID3$S61_PID7>=1,1,0))
table(taps_alt_PID3$S61_PID3_alt)
table(taps_alt_PID3$S61_PID7)
table(taps_alt_PID3$S61_PID3_alt,taps_alt_PID3$S61_PID7)

taps_alt_PID3$S64_PID3_alt = ifelse(taps_alt_PID3$S64_PID7<=-1,-1,ifelse(taps_alt_PID3$S64_PID7>=1,1,0))
table(taps_alt_PID3$S64_PID3_alt)
table(taps_alt_PID3$S64_PID7)
table(taps_alt_PID3$S64_PID3_alt,taps_alt_PID3$S64_PID7)

taps_alt_PID3$S66_PID3_alt = ifelse(taps_alt_PID3$S66_PID7<=-1,-1,ifelse(taps_alt_PID3$S66_PID7>=1,1,0))
table(taps_alt_PID3$S66_PID3_alt)
table(taps_alt_PID3$S66_PID7)
table(taps_alt_PID3$S66_PID3_alt,taps_alt_PID3$S66_PID7)

taps_alt_PID3$S70_PID3_alt = ifelse(taps_alt_PID3$S70_PID7<=-1,-1,ifelse(taps_alt_PID3$S70_PID7>=1,1,0))
table(taps_alt_PID3$S70_PID3_alt)
table(taps_alt_PID3$S70_PID7)
table(taps_alt_PID3$S70_PID3_alt,taps_alt_PID3$S70_PID7)

taps_alt_PID3 = taps_alt_PID3[,25:48]

##Computing statistics
#Correlations
cor_PID3 = round(cor(taps_alt_PID3),3)

lower.tri(cor_PID3, diag = FALSE)
upper.tri(cor_PID3, diag = FALSE)
lower_3<-cor_PID3
lower_3[lower.tri(cor_PID3, diag=TRUE)]=""
lower_3<-as.data.frame(lower_3)
lower_3
write.csv(lower_3, "./complete_cases_PID3_alternative_correlations.csv")

#Means and SDs (Individuals)
taps_alt_PID3$Mean_PID3_alt = apply(taps_alt_PID3[,1:24], 1, mean, na.rm = TRUE)
taps_alt_PID3$SD_PID3_alt = apply(taps_alt_PID3[,1:24], 1, sd, na.rm = TRUE)

summary(taps_alt_PID3$Mean_PID3_alt)
summary(taps_alt_PID3$SD_PID3_alt)

#Means and SDs (Waves)
as.data.frame(round(apply(taps_alt_PID3, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(taps_alt_PID3, 2, sd, na.rm=TRUE),2))

##Time Series and IV Regressions
#Creating multiple dataframes
complete_cases_1_5_7_PID3_alt = taps_alt_PID3[, c('S1_PID3_alt','S5_PID3_alt','S7_PID3_alt')]
complete_cases_5_7_10_PID3_alt = taps_alt_PID3[, c('S5_PID3_alt','S7_PID3_alt','S10_PID3_alt')]
complete_cases_7_10_11_PID3_alt = taps_alt_PID3[, c('S7_PID3_alt','S10_PID3_alt','S11_PID3_alt')]
complete_cases_10_11_12_PID3_alt = taps_alt_PID3[, c('S10_PID3_alt','S11_PID3_alt','S12_PID3_alt')]
complete_cases_11_12_17_PID3_alt = taps_alt_PID3[, c('S11_PID3_alt','S12_PID3_alt','S17_PID3_alt')]
complete_cases_12_17_18_PID3_alt = taps_alt_PID3[, c('S12_PID3_alt','S17_PID3_alt','S18_PID3_alt')]
complete_cases_17_18_20_PID3_alt = taps_alt_PID3[, c('S17_PID3_alt','S18_PID3_alt','S20_PID3_alt')]
complete_cases_18_20_25_PID3_alt = taps_alt_PID3[, c('S18_PID3_alt','S20_PID3_alt','S25_PID3_alt')]
complete_cases_20_25_28_PID3_alt = taps_alt_PID3[, c('S20_PID3_alt','S25_PID3_alt','S28_PID3_alt')]
complete_cases_25_28_31_PID3_alt = taps_alt_PID3[, c('S25_PID3_alt','S28_PID3_alt','S31_PID3_alt')]
complete_cases_28_31_34_PID3_alt = taps_alt_PID3[, c('S28_PID3_alt','S31_PID3_alt','S34_PID3_alt')]
complete_cases_31_34_36_PID3_alt = taps_alt_PID3[, c('S31_PID3_alt','S34_PID3_alt','S36_PID3_alt')]
complete_cases_34_36_38_PID3_alt = taps_alt_PID3[, c('S34_PID3_alt','S36_PID3_alt','S38_PID3_alt')]
complete_cases_36_38_44_PID3_alt = taps_alt_PID3[, c('S36_PID3_alt','S38_PID3_alt','S44_PID3_alt')]
complete_cases_38_44_46_PID3_alt = taps_alt_PID3[, c('S38_PID3_alt','S44_PID3_alt','S46_PID3_alt')]
complete_cases_44_46_50_PID3_alt = taps_alt_PID3[, c('S44_PID3_alt','S46_PID3_alt','S50_PID3_alt')]
complete_cases_46_50_51_PID3_alt = taps_alt_PID3[, c('S46_PID3_alt','S50_PID3_alt','S51_PID3_alt')]
complete_cases_50_51_54_PID3_alt = taps_alt_PID3[, c('S50_PID3_alt','S51_PID3_alt','S54_PID3_alt')]
complete_cases_51_54_61_PID3_alt = taps_alt_PID3[, c('S51_PID3_alt','S54_PID3_alt','S61_PID3_alt')]
complete_cases_54_61_64_PID3_alt = taps_alt_PID3[, c('S54_PID3_alt','S61_PID3_alt','S64_PID3_alt')]
complete_cases_61_64_66_PID3_alt = taps_alt_PID3[, c('S61_PID3_alt','S64_PID3_alt','S66_PID3_alt')]
complete_cases_64_66_70_PID3_alt = taps_alt_PID3[, c('S64_PID3_alt','S66_PID3_alt','S70_PID3_alt')]

names(complete_cases_1_5_7_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_5_7_10_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_7_10_11_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_10_11_12_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_11_12_17_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_12_17_18_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_17_18_20_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_18_20_25_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_20_25_28_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_25_28_31_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_28_31_34_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_31_34_36_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_34_36_38_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_36_38_44_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_38_44_46_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_44_46_50_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_46_50_51_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_50_51_54_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_51_54_61_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_54_61_64_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_61_64_66_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')
names(complete_cases_64_66_70_PID3_alt)[1:3] <- c('PID_t_0','PID_t_1','PID_t_2')

##Creating combined dataframes
complete_cases_rbind_PID3_alt = rbind(complete_cases_1_5_7_PID3_alt,complete_cases_5_7_10_PID3_alt,complete_cases_7_10_11_PID3_alt,complete_cases_10_11_12_PID3_alt,complete_cases_11_12_17_PID3_alt,complete_cases_12_17_18_PID3_alt,complete_cases_17_18_20_PID3_alt,complete_cases_18_20_25_PID3_alt,complete_cases_20_25_28_PID3_alt,complete_cases_25_28_31_PID3_alt,complete_cases_28_31_34_PID3_alt,complete_cases_31_34_36_PID3_alt,complete_cases_34_36_38_PID3_alt,complete_cases_36_38_44_PID3_alt,complete_cases_38_44_46_PID3_alt,complete_cases_44_46_50_PID3_alt,complete_cases_46_50_51_PID3_alt,complete_cases_50_51_54_PID3_alt,complete_cases_51_54_61_PID3_alt,complete_cases_54_61_64_PID3_alt,complete_cases_61_64_66_PID3_alt,complete_cases_64_66_70_PID3_alt)

##Time Series Regressions
summary(lm(PID_t_2 ~ PID_t_1, data=complete_cases_rbind_PID3_alt))
summary(lm(PID_t_2 ~ PID_t_1 + PID_t_0, data=complete_cases_rbind_PID3_alt))

##IV Regressions
library('ivreg')
summary(ivreg(PID_t_2 ~ PID_t_1, ~ PID_t_0, data=complete_cases_rbind_PID3_alt))



####Estimating Drift Respondent-Wise [COMPLETE CASES ONLY]
###Creating data set [COMPLETE CASES ONLY]
#PID3
waves_PID3_2012 = taps_2012[,c("WUSTLID","S1_PID3","S5_PID3","S7_PID3","S10_PID3","S11_PID3","S12_PID3")]
waves_PID3_2012 = waves_PID3_2012[rowSums(is.na(waves_PID3_2012))==0,]
waves_PID3_2013 = taps_2013[,c("WUSTLID","S17_PID3","S18_PID3","S20_PID3","S25_PID3")]
waves_PID3_2013 = waves_PID3_2013[rowSums(is.na(waves_PID3_2013))==0,]
waves_PID3_2012_2013 = merge(waves_PID3_2012,waves_PID3_2013, by="WUSTLID")
waves_PID3_2014 = taps_2014[,c("WUSTLID","S28_PID3","S31_PID3","S34_PID3","S36_PID3")]
waves_PID3_2014 = waves_PID3_2014[rowSums(is.na(waves_PID3_2014))==0,]
waves_PID3_2012_2014 = merge(waves_PID3_2012_2013,waves_PID3_2014, by="WUSTLID")
waves_PID3_2015 = taps_2015[,c("WUSTLID","S38_PID3","S44_PID3","S46_PID3")]
waves_PID3_2015 = waves_PID3_2015[rowSums(is.na(waves_PID3_2015))==0,]
waves_PID3_2012_2015 = merge(waves_PID3_2012_2014,waves_PID3_2015, by="WUSTLID")
waves_PID3_2016 = taps_2016[,c("WUSTLID","S50_PID3","S51_PID3","S54_PID3","S61_PID3")]
waves_PID3_2016 = waves_PID3_2016[rowSums(is.na(waves_PID3_2016))==0,]
waves_PID3_2012_2016 = merge(waves_PID3_2012_2015,waves_PID3_2016, by="WUSTLID")
waves_PID3_2017 = taps_2017[,c("WUSTLID","S64_PID3","S66_PID3")]
waves_PID3_2017 = waves_PID3_2017[rowSums(is.na(waves_PID3_2017))==0,]
waves_PID3_2012_2017 = merge(waves_PID3_2012_2016,waves_PID3_2017, by="WUSTLID")
waves_PID3_2018 = taps_2018[,c("WUSTLID","S70_PID3")]
waves_PID3_2018 = waves_PID3_2018[rowSums(is.na(waves_PID3_2018))==0,]
complete_cases_PID3 = merge(waves_PID3_2012_2017,waves_PID3_2018, by="WUSTLID")
complete_cases_PID3 = complete_cases_PID3[,2:25]

#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

##Determining how many respondents have identical values throughout the six-wave time series
identical_PID7 = rep(NA,nrow(complete_cases_PID7))
for (i in 1:nrow(complete_cases_PID7)){
  num = length(unique(as.list(complete_cases_PID7[i,])))
  if(num==1){
    identical_PID7[i] = 1
  }
  else{
    identical_PID7[i] = 0
  }
}

sum(identical_PID7)/445 #0.294

identical_PID3 = rep(NA,nrow(complete_cases_PID3))
for (i in 1:nrow(complete_cases_PID3)){
  num = length(unique(as.list(complete_cases_PID3[i,])))
  if(num==1){
    identical_PID3[i] = 1
  }
  else{
    identical_PID3[i] = 0
  }
}

sum(identical_PID3)/365 #0.616

##Creating and appending unique ID column
unique_ID_PID7 = as.data.frame(seq(1,nrow(complete_cases_PID7),1))
complete_cases_PID7 = cbind(unique_ID_PID7,complete_cases_PID7)
names(complete_cases_PID7)[1] = 'Unique ID'

unique_ID_PID3 = as.data.frame(seq(1,nrow(complete_cases_PID3),1))
complete_cases_PID3 = cbind(unique_ID_PID3,complete_cases_PID3)
names(complete_cases_PID3)[1] = 'Unique ID'

##Creating long form dataframes
library(tidyr)
all_waves_PID_7_long = gather(complete_cases_PID7,PID_wave,PID_7,S1_PID7:S70_PID7,factor_key=TRUE)
all_waves_PID_7_long = all_waves_PID_7_long[order(all_waves_PID_7_long$`Unique ID`),]

all_waves_PID_3_long = gather(complete_cases_PID3,PID_wave,PID_3,S1_PID3:S70_PID3,factor_key=TRUE)
all_waves_PID_3_long = all_waves_PID_3_long[order(all_waves_PID_3_long$`Unique ID`),]

##Adding wave columns
waves_PID7 = as.data.frame(cbind(rep(seq(0,23,1),nrow(complete_cases_PID7)),rep(c(0,5,7,10,11,12,17,18,20,25,28,31,34,36,38,44,46,50,51,54,61,65,68,74),nrow(complete_cases_PID7))))
names(waves_PID7) <- c('wave_counter','months_since_origin')

waves_PID3 = as.data.frame(cbind(rep(seq(0,23,1),nrow(complete_cases_PID3)),rep(c(0,5,7,10,11,12,17,18,20,25,28,31,34,36,38,44,46,50,51,54,61,65,68,74),nrow(complete_cases_PID3))))
names(waves_PID3) <- c('wave_counter','months_since_origin')

all_waves_PID_7_long = cbind(all_waves_PID_7_long,waves_PID7)
all_waves_PID_3_long = cbind(all_waves_PID_3_long,waves_PID3)

##OLS Regressions
#wave_counter
PID_7_drift_coef = c()
PID_7_drift_p_value = c()
UID_set_PID_7 = c()
for (i in all_waves_PID_7_long$`Unique ID`){
  if (is.element(i,UID_set_PID_7)){
    next
  }
  else{
    df = subset(all_waves_PID_7_long, `Unique ID`==i)
    lm = lm(PID_7 ~ wave_counter, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_7_drift_coef = c(PID_7_drift_coef, coef)
    PID_7_drift_p_value = c(PID_7_drift_p_value,coef2)
    UID_set_PID_7 = c(UID_set_PID_7,i)
  }
}

PID_3_drift_coef = c()
PID_3_drift_p_value = c()
UID_set_PID_3 = c()
for (i in all_waves_PID_3_long$`Unique ID`){
  if (is.element(i,UID_set_PID_3)){
    next
  }
  else{
    df = subset(all_waves_PID_3_long, `Unique ID`==i)
    lm = lm(PID_3 ~ wave_counter, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_3_drift_coef = c(PID_3_drift_coef, coef)
    PID_3_drift_p_value = c(PID_3_drift_p_value,coef2)
    UID_set_PID_3 = c(UID_set_PID_3,i)
  }
}

hist(PID_7_drift_coef, breaks=50)
summary(PID_7_drift_coef)
threshold_val = 1/23 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID7 = 0
for(i in PID_7_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID7 = counter_threshold_PID7+1
    }
  }
}

counter_threshold_PID7/nrow(complete_cases_PID7) #.085

hist(PID_7_drift_p_value, breaks=50)
summary(PID_7_drift_p_value)
counter_PID7 = 0
for(i in PID_7_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID7 = counter_PID7+1
    }
  }
}

hist(PID_3_drift_coef, breaks=50)
summary(PID_3_drift_coef)
threshold_val = 1/23 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID3 = 0
for(i in PID_3_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID3 = counter_threshold_PID3+1
    }
  }
}

counter_threshold_PID3/nrow(complete_cases_PID3) #.022

hist(PID_3_drift_p_value, breaks=50)
summary(PID_3_drift_p_value)
counter_PID3 = 0
for(i in PID_3_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID3 = counter_PID3+1
    }
  }
}

#Counting respondents whose coefficients are exactly 0
sum(ifelse(PID_7_drift_coef==0,1,0))/nrow(complete_cases_PID7) #0.000
sum(ifelse(PID_3_drift_coef==0,1,0))/nrow(complete_cases_PID3) #0.110
#THESE NUMBERS ARE CORRECT BUT MISLEADING. THEY EXCLUDE VALUES INFINITESIMALLY DIFFERENT FROM ZERO. USE THE RESULTS CALCULATED ABOVE INSTEAD.

#months_since_origin
PID_7_drift_coef = c()
PID_7_drift_p_value = c()
UID_set_PID_7 = c()
for (i in all_waves_PID_7_long$`Unique ID`){
  if (is.element(i,UID_set_PID_7)){
    next
  }
  else{
    df = subset(all_waves_PID_7_long, `Unique ID`==i)
    lm = lm(PID_7 ~ months_since_origin, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_7_drift_coef = c(PID_7_drift_coef, coef)
    PID_7_drift_p_value = c(PID_7_drift_p_value,coef2)
    UID_set_PID_7 = c(UID_set_PID_7,i)
  }
}

PID_3_drift_coef = c()
PID_3_drift_p_value = c()
UID_set_PID_3 = c()
for (i in all_waves_PID_3_long$`Unique ID`){
  if (is.element(i,UID_set_PID_3)){
    next
  }
  else{
    df = subset(all_waves_PID_3_long, `Unique ID`==i)
    lm = lm(PID_3 ~ months_since_origin, data=df)
    coef = summary(lm)$coefficients[2,1]
    coef2 = summary(lm)$coefficients[2,4]
    PID_3_drift_coef = c(PID_3_drift_coef, coef)
    PID_3_drift_p_value = c(PID_3_drift_p_value,coef2)
    UID_set_PID_3 = c(UID_set_PID_3,i)
  }
}

hist(PID_7_drift_coef, breaks=50)
summary(PID_7_drift_coef)
threshold_val = 1/70 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID7 = 0
for(i in PID_7_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID7 = counter_threshold_PID7+1
    }
  }
}

counter_threshold_PID7/nrow(complete_cases_PID7) #.088

hist(PID_7_drift_p_value, breaks=50)
summary(PID_7_drift_p_value)
counter_PID7 = 0
for(i in PID_7_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID7 = counter_PID7+1
    }
  }
}

hist(PID_3_drift_coef, breaks=50)
summary(PID_3_drift_coef)
threshold_val = 1/70 #This is the coefficient value required to drift one-full point over the full time-series.
counter_threshold_PID3 = 0
for(i in PID_3_drift_coef){
  if(is.nan(i)){
    next
  }
  else{
    if(abs(i>=threshold_val)){
      counter_threshold_PID3 = counter_threshold_PID3+1
    }
  }
}

counter_threshold_PID3/nrow(complete_cases_PID3) #0.022

hist(PID_3_drift_p_value, breaks=50)
summary(PID_3_drift_p_value)
counter_PID3 = 0
for(i in PID_3_drift_p_value){
  if(is.nan(i)){
    next
  }
  else{
    if(i<0.05){
      counter_PID3 = counter_PID3+1
    }
  }
}

#Counting respondents whose coefficients are exactly 0
sum(ifelse(PID_7_drift_coef==0,1,0))/nrow(complete_cases_PID7) #0.000
sum(ifelse(PID_3_drift_coef==0,1,0))/nrow(complete_cases_PID3) #0.110
#THESE NUMBERS ARE CORRECT BUT MISLEADING. THEY EXCLUDE VALUES INFINITESIMALLY DIFFERENT FROM ZERO. USE THE RESULTS CALCULATED ABOVE INSTEAD.

#Visualizations for paper
library(ggplot2)
PID_7_drift_coef_df = as.data.frame(PID_7_drift_coef)
PID_3_drift_coef_df = as.data.frame(PID_3_drift_coef)

ggplot(PID_7_drift_coef_df, aes(x=PID_7_drift_coef)) +
  geom_histogram(color="black", binwidth=0.025) + 
  ggtitle("") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  xlab("OLS Coefficients (Slopes)") +
  ylab("Frequency") +
  xlim(-1,1)

#Saving in high resolution
ggsave(filename = "./Results/TAPS_Trajectories_Wave Count_Narrow Bins_090922.png", height = 5.7, width = 15.29, units = c("cm"), device='png', dpi=600)



####Predicting Number of Missing Responses (Respondent-wise)
##24 waves OUTER JOIN
waves2012_13 = merge(taps_2012,taps_2013, by="WUSTLID", all=TRUE)
waves2012_14 = merge(waves2012_13,taps_2014, by="WUSTLID", all=TRUE)
waves2012_15 = merge(waves2012_14,taps_2015, by="WUSTLID", all=TRUE)
waves2012_16 = merge(waves2012_15,taps_2016, by="WUSTLID", all=TRUE)
waves2012_17 = merge(waves2012_16,taps_2017, by="WUSTLID", all=TRUE)
waves_2012_18 = merge(waves2012_17,taps_2018, by="WUSTLID", all=TRUE)

##Removing interim data sets
rm(list = ls()[grepl("waves2012_1", ls())])

##Creating three categories for a dependent variable: missing, not missing, and inapplicable
waves_2012_18$In2012 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2012$WUSTLID,"Yes","No"))
waves_2012_18$In2013 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2013$WUSTLID,"Yes","No"))
waves_2012_18$In2014 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2014$WUSTLID,"Yes","No"))
waves_2012_18$In2015 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2015$WUSTLID,"Yes","No"))
waves_2012_18$In2016 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2016$WUSTLID,"Yes","No"))
waves_2012_18$In2017 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2017$WUSTLID,"Yes","No"))
waves_2012_18$In2018 = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2018$WUSTLID,"Yes","No"))

waves_2012_18$S1_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S1_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S1_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S1_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S5_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S5_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S5_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S5_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S7_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S7_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S7_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S7_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S10_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S10_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S10_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S10_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S11_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S11_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S11_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S11_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S12_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S12_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S12_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S12_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S17_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S17_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S17_PID7) & waves_2012_18$In2013=="Yes","Missing",ifelse(is.na(waves_2012_18$S17_PID7) & waves_2012_18$In2013=="No","Inapplicable",NA))))
waves_2012_18$S18_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S18_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S18_PID7) & waves_2012_18$In2013=="Yes","Missing",ifelse(is.na(waves_2012_18$S18_PID7) & waves_2012_18$In2013=="No","Inapplicable",NA))))
waves_2012_18$S20_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S20_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S20_PID7) & waves_2012_18$In2013=="Yes","Missing",ifelse(is.na(waves_2012_18$S20_PID7) & waves_2012_18$In2013=="No","Inapplicable",NA))))
waves_2012_18$S25_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S25_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S25_PID7) & waves_2012_18$In2013=="Yes","Missing",ifelse(is.na(waves_2012_18$S25_PID7) & waves_2012_18$In2013=="No","Inapplicable",NA))))
waves_2012_18$S28_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S28_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S28_PID7) & waves_2012_18$In2014=="Yes","Missing",ifelse(is.na(waves_2012_18$S28_PID7) & waves_2012_18$In2014=="No","Inapplicable",NA))))
waves_2012_18$S31_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S31_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S31_PID7) & waves_2012_18$In2014=="Yes","Missing",ifelse(is.na(waves_2012_18$S31_PID7) & waves_2012_18$In2014=="No","Inapplicable",NA))))
waves_2012_18$S34_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S34_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S34_PID7) & waves_2012_18$In2014=="Yes","Missing",ifelse(is.na(waves_2012_18$S34_PID7) & waves_2012_18$In2014=="No","Inapplicable",NA))))
waves_2012_18$S36_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S36_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S36_PID7) & waves_2012_18$In2014=="Yes","Missing",ifelse(is.na(waves_2012_18$S36_PID7) & waves_2012_18$In2014=="No","Inapplicable",NA))))
waves_2012_18$S38_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S38_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S38_PID7) & waves_2012_18$In2015=="Yes","Missing",ifelse(is.na(waves_2012_18$S38_PID7) & waves_2012_18$In2015=="No","Inapplicable",NA))))
waves_2012_18$S44_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S44_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S44_PID7) & waves_2012_18$In2015=="Yes","Missing",ifelse(is.na(waves_2012_18$S44_PID7) & waves_2012_18$In2015=="No","Inapplicable",NA))))
waves_2012_18$S46_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S46_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S46_PID7) & waves_2012_18$In2015=="Yes","Missing",ifelse(is.na(waves_2012_18$S46_PID7) & waves_2012_18$In2015=="No","Inapplicable",NA))))
waves_2012_18$S50_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S50_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S50_PID7) & waves_2012_18$In2012=="Yes","Missing",ifelse(is.na(waves_2012_18$S50_PID7) & waves_2012_18$In2012=="No","Inapplicable",NA))))
waves_2012_18$S51_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S51_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S51_PID7) & waves_2012_18$In2016=="Yes","Missing",ifelse(is.na(waves_2012_18$S51_PID7) & waves_2012_18$In2016=="No","Inapplicable",NA))))
waves_2012_18$S54_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S54_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S54_PID7) & waves_2012_18$In2016=="Yes","Missing",ifelse(is.na(waves_2012_18$S54_PID7) & waves_2012_18$In2016=="No","Inapplicable",NA))))
waves_2012_18$S61_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S61_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S61_PID7) & waves_2012_18$In2016=="Yes","Missing",ifelse(is.na(waves_2012_18$S61_PID7) & waves_2012_18$In2016=="No","Inapplicable",NA))))
waves_2012_18$S64_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S64_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S64_PID7) & waves_2012_18$In2017=="Yes","Missing",ifelse(is.na(waves_2012_18$S64_PID7) & waves_2012_18$In2017=="No","Inapplicable",NA))))
waves_2012_18$S66_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S66_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S66_PID7) & waves_2012_18$In2017=="Yes","Missing",ifelse(is.na(waves_2012_18$S66_PID7) & waves_2012_18$In2017=="No","Inapplicable",NA))))
waves_2012_18$S70_PID7_missing = as.factor(ifelse(!is.na(waves_2012_18$S70_PID7),"Not Missing",ifelse(is.na(waves_2012_18$S70_PID7) & waves_2012_18$In2018=="Yes","Missing",ifelse(is.na(waves_2012_18$S70_PID7) & waves_2012_18$In2018=="No","Inapplicable",NA))))

##Creating long form dataframe
#I had to break this data up into three smaller dataframes to preserve memory
waves_2012_18_trimmed_1 = waves_2012_18[,c(1,31593:31601)]
waves_2012_18_trimmed_2 = waves_2012_18[,c(1,31602:31609)]
waves_2012_18_trimmed_3 = waves_2012_18[,c(1,31610:31616)]
library(tidyr)
waves_2012_18_long_1 = gather(waves_2012_18_trimmed_1,Survey_Wave,Missing,S1_PID7_missing:S20_PID7_missing,factor_key=TRUE)
waves_2012_18_long_2 = gather(waves_2012_18_trimmed_2,Survey_Wave,Missing,S25_PID7_missing:S46_PID7_missing,factor_key=TRUE)
waves_2012_18_long_3 = gather(waves_2012_18_trimmed_3,Survey_Wave,Missing,S50_PID7_missing:S70_PID7_missing,factor_key=TRUE)
waves_2012_18_long = rbind(waves_2012_18_long_1,waves_2012_18_long_2,waves_2012_18_long_3)
waves_2012_18_long = waves_2012_18_long[order(waves_2012_18_long$WUSTLID),]
table(waves_2012_18_long$Missing)

##Filter pivoted table to remove inapplicable
waves_2012_18_long_valid_cases = subset(waves_2012_18_long,waves_2012_18_long$Missing!="Inapplicable")
table(waves_2012_18_long_valid_cases$Missing)

waves_2012_18_long_valid_cases$Missing = as.factor(waves_2012_18_long_valid_cases$Missing)
waves_2012_18_long_valid_cases$Missing = relevel(waves_2012_18_long_valid_cases$Missing, ref = "Not Missing")

##Appending demographics
#Seeing how many new respondents come from each wave
waves_2012_18$first_wave = as.factor(ifelse(waves_2012_18$WUSTLID %in% taps_2012$WUSTLID,"2012",ifelse(waves_2012_18$WUSTLID %in% taps_2013$WUSTLID,"2013",ifelse(waves_2012_18$WUSTLID %in% taps_2014$WUSTLID,"2014",ifelse(waves_2012_18$WUSTLID %in% taps_2015$WUSTLID,"2015",ifelse(waves_2012_18$WUSTLID %in% taps_2016$WUSTLID,"2016",ifelse(waves_2012_18$WUSTLID %in% taps_2017$WUSTLID,"2017",ifelse(waves_2012_18$WUSTLID %in% taps_2018$WUSTLID,"2018",NA))))))))
table(waves_2012_18$first_wave)
#Unfortunately, some of the later wave files don't seem to have any demographic columns included, so I have no way of ascertaining demographics for respondents who newly appear in those waves. What follows will have to be missingness analysis among those who started in either of the first two data files.

#Adding 2011 and 2013 demographic predictors (NOTE: I could not find everything in the original 2012 file (which covers 2011), and some of the variables have more complete information in the 2013 file, so I used that where needed)
demographics_2011 = taps_2012[, c('WUSTLID','PPSTATENS1','PPETHMS1','S1_PID7')]

demographics_2011$region = as.factor(ifelse(is.na(demographics_2011$PPSTATENS1),'Missing',ifelse(demographics_2011$PPSTATENS1=="WA"|demographics_2011$PPSTATENS1=="MT"|demographics_2011$PPSTATENS1=="OR"|demographics_2011$PPSTATENS1=="ID"|demographics_2011$PPSTATENS1=="WY"|demographics_2011$PPSTATENS1=="CA"|demographics_2011$PPSTATENS1=="NV"|demographics_2011$PPSTATENS1=="UT"|demographics_2011$PPSTATENS1=="CO"|demographics_2011$PPSTATENS1=="AZ"|demographics_2011$PPSTATENS1=="NM"|demographics_2011$PPSTATENS1=="AK"|demographics_2011$PPSTATENS1=="HI","West",ifelse(demographics_2011$PPSTATENS1=="ND"|demographics_2011$PPSTATENS1=="SD"|demographics_2011$PPSTATENS1=="NE"|demographics_2011$PPSTATENS1=="KS"|demographics_2011$PPSTATENS1=="MN"|demographics_2011$PPSTATENS1=="IA"|demographics_2011$PPSTATENS1=="MO"|demographics_2011$PPSTATENS1=="WI"|demographics_2011$PPSTATENS1=="IL"|demographics_2011$PPSTATENS1=="IN"|demographics_2011$PPSTATENS1=="MI"|demographics_2011$PPSTATENS1=="OH","Midwest",ifelse(demographics_2011$PPSTATENS1=="ME"|demographics_2011$PPSTATENS1=="NH"|demographics_2011$PPSTATENS1=="VT"|demographics_2011$PPSTATENS1=="MA"|demographics_2011$PPSTATENS1=="RI"|demographics_2011$PPSTATENS1=="CT"|demographics_2011$PPSTATENS1=="NY"|demographics_2011$PPSTATENS1=="NJ"|demographics_2011$PPSTATENS1=="PA","Northeast",ifelse(demographics_2011$PPSTATENS1=="DE"|demographics_2011$PPSTATENS1=="MD"|demographics_2011$PPSTATENS1=="DC"|demographics_2011$PPSTATENS1=="VA"|demographics_2011$PPSTATENS1=="WV"|demographics_2011$PPSTATENS1=="KY"|demographics_2011$PPSTATENS1=="NC"|demographics_2011$PPSTATENS1=="TN"|demographics_2011$PPSTATENS1=="SC"|demographics_2011$PPSTATENS1=="GA"|demographics_2011$PPSTATENS1=="AL"|demographics_2011$PPSTATENS1=="MS"|demographics_2011$PPSTATENS1=="FL"|demographics_2011$PPSTATENS1=="AR"|demographics_2011$PPSTATENS1=="LA"|demographics_2011$PPSTATENS1=="OK"|demographics_2011$PPSTATENS1=="TX","South","Missing"))))))
demographics_2011$region = relevel(demographics_2011$region, ref = "Northeast")
table(demographics_2011$PPSTATENS1,demographics_2011$region)

demographics_2011$PPETHMS1 = relevel(demographics_2011$PPETHMS1, ref = "White, Non-Hispanic")

summary(demographics_2011)

demographics_2013 = taps_2013[, c('WUSTLID','gendersp','incomehhldsp','agesp','educsp')]

demographics_2013$gender = as.factor(ifelse(demographics_2013$gendersp=="Female","Female",ifelse(demographics_2013$gendersp=="Male","Male",NA)))
demographics_2013$gender = relevel(demographics_2013$gender, ref = "Male")

demographics_2013$income = sapply(demographics_2013$incomehhldsp, income)
demographics_2013$income_thousands = demographics_2013$income/1000 

demographics_2013$educ = as.factor(ifelse(demographics_2013$educsp=="No formal education"|demographics_2013$educsp=="7th or 8th grade"|demographics_2013$educsp=="9th grade"|demographics_2013$educsp=="10th grade"|demographics_2013$educsp=="11th grade"|demographics_2013$educsp=="12th grade NO DIPLOMA","No HS",ifelse(demographics_2013$educsp=="HIGH SCHOOL GRADUATE - high school DIPLOMA or the equivalent (GED)","High school",ifelse(demographics_2013$educsp=="Some college, but no degree"|demographics_2013$educsp=="Associate degree","Some college",ifelse(demographics_2013$educsp=="Bachelor's degree"|demographics_2013$educsp=="Master's degree"|demographics_2013$educsp=="Professional degree"|demographics_2013$educsp=="Doctorate degree","Bachelor's degree+",NA)))))
demographics_2013$educ = relevel(demographics_2013$educ, ref = "No HS")
table(demographics_2013$educsp,demographics_2013$educ)

summary(demographics_2013)

#Adding two more predictors
demographics_2011$partisan_intensity_S1 = ifelse(demographics_2011$S1_PID7==3|demographics_2011$S1_PID7==-3,2,ifelse(demographics_2011$S1_PID7==2|demographics_2011$S1_PID7==-2,1,ifelse(demographics_2011$S1_PID7==1|demographics_2011$S1_PID7==0|demographics_2011$S1_PID7==-1,0,NA)))
table(demographics_2011$partisan_intensity_S1,demographics_2011$S1_PID7)

demographics_2011$democrat_S1 = ifelse(demographics_2011$S1_PID7<0,1,ifelse(demographics_2011$S1_PID7>0,0,ifelse(demographics_2011$S1_PID7==0,0.5,NA)))
table(demographics_2011$democrat_S1,demographics_2011$S1_PID7)

##Joining PID dataframe and demographic dataframes
waves_2012_18_long_valid_cases = merge(waves_2012_18_long_valid_cases,demographics_2011, by="WUSTLID", all.x=TRUE)
waves_2012_18_long_valid_cases = merge(waves_2012_18_long_valid_cases,demographics_2013, by="WUSTLID", all.x=TRUE)

##Logistic regression equations
summary(glm(Missing ~ gender, family = binomial, data = waves_2012_18_long_valid_cases)) #Female is strongly positively associated with missingness
summary(glm(Missing ~ income_thousands, family = binomial, data = waves_2012_18_long_valid_cases)) #Income is strongly negatively associated with missingness
summary(glm(Missing ~ region, family = binomial, data = waves_2012_18_long_valid_cases)) #The West is negatively associated with missingness compared to the Northeast
summary(glm(Missing ~ PPETHMS1, family = binomial, data = waves_2012_18_long_valid_cases)) #White race is negatively associated with missingness compared to other racial categories
summary(glm(Missing ~ agesp, family = binomial, data = waves_2012_18_long_valid_cases)) #Age is strongly negatively associated with missingness
summary(glm(Missing ~ educ, family = binomial, data = waves_2012_18_long_valid_cases)) #Education is strongly negatively associated with missingness
summary(glm(Missing ~ partisan_intensity_S1, family = binomial, data = waves_2012_18_long_valid_cases)) #Partisan intensity is not associated with missingness
summary(glm(Missing ~ democrat_S1, family = binomial, data = waves_2012_18_long_valid_cases)) #Democratic partisanship is positively associated with missingness

summary(waves_2012_18_long_valid_cases$Missing)



####Predicting Individual-level Standard Deviations [COMPLETE CASES ONLY]
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")

##Adding Means and SDs (Individuals)
complete_cases_PID7$Mean_PID7 = apply(complete_cases_PID7[,2:24], 1, mean)
complete_cases_PID7$SD_PID7 = apply(complete_cases_PID7[,2:24], 1, sd)

##Adding 2011 and 2013 demographic predictors (NOTE: I could not find everything in the original 2012 file (which covers 2011), and some of the variables have more complete information in the 2013 file, so I used that where needed)
#demographics_2011 = taps_2012[, c('GENDERS1','PPSTATENS1','INCOMEHHLDS1','PPETHMS1','AGES1','EDUCS1')]
demographics_2011 = taps_2012[, c('WUSTLID','PPSTATENS1','PPETHMS1')]

demographics_2011$region = as.factor(ifelse(is.na(demographics_2011$PPSTATENS1),'Missing',ifelse(demographics_2011$PPSTATENS1=="WA"|demographics_2011$PPSTATENS1=="MT"|demographics_2011$PPSTATENS1=="OR"|demographics_2011$PPSTATENS1=="ID"|demographics_2011$PPSTATENS1=="WY"|demographics_2011$PPSTATENS1=="CA"|demographics_2011$PPSTATENS1=="NV"|demographics_2011$PPSTATENS1=="UT"|demographics_2011$PPSTATENS1=="CO"|demographics_2011$PPSTATENS1=="AZ"|demographics_2011$PPSTATENS1=="NM"|demographics_2011$PPSTATENS1=="AK"|demographics_2011$PPSTATENS1=="HI","West",ifelse(demographics_2011$PPSTATENS1=="ND"|demographics_2011$PPSTATENS1=="SD"|demographics_2011$PPSTATENS1=="NE"|demographics_2011$PPSTATENS1=="KS"|demographics_2011$PPSTATENS1=="MN"|demographics_2011$PPSTATENS1=="IA"|demographics_2011$PPSTATENS1=="MO"|demographics_2011$PPSTATENS1=="WI"|demographics_2011$PPSTATENS1=="IL"|demographics_2011$PPSTATENS1=="IN"|demographics_2011$PPSTATENS1=="MI"|demographics_2011$PPSTATENS1=="OH","Midwest",ifelse(demographics_2011$PPSTATENS1=="ME"|demographics_2011$PPSTATENS1=="NH"|demographics_2011$PPSTATENS1=="VT"|demographics_2011$PPSTATENS1=="MA"|demographics_2011$PPSTATENS1=="RI"|demographics_2011$PPSTATENS1=="CT"|demographics_2011$PPSTATENS1=="NY"|demographics_2011$PPSTATENS1=="NJ"|demographics_2011$PPSTATENS1=="PA","Northeast",ifelse(demographics_2011$PPSTATENS1=="DE"|demographics_2011$PPSTATENS1=="MD"|demographics_2011$PPSTATENS1=="DC"|demographics_2011$PPSTATENS1=="VA"|demographics_2011$PPSTATENS1=="WV"|demographics_2011$PPSTATENS1=="KY"|demographics_2011$PPSTATENS1=="NC"|demographics_2011$PPSTATENS1=="TN"|demographics_2011$PPSTATENS1=="SC"|demographics_2011$PPSTATENS1=="GA"|demographics_2011$PPSTATENS1=="AL"|demographics_2011$PPSTATENS1=="MS"|demographics_2011$PPSTATENS1=="FL"|demographics_2011$PPSTATENS1=="AR"|demographics_2011$PPSTATENS1=="LA"|demographics_2011$PPSTATENS1=="OK"|demographics_2011$PPSTATENS1=="TX","South","Missing"))))))
demographics_2011$region = relevel(demographics_2011$region, ref = "Northeast")
table(demographics_2011$PPSTATENS1,demographics_2011$region)

demographics_2011$PPETHMS1 = relevel(demographics_2011$PPETHMS1, ref = "White, Non-Hispanic")

summary(demographics_2011)

demographics_2013 = taps_2013[, c('WUSTLID','gendersp','incomehhldsp','agesp','educsp')]

demographics_2013$gender = as.factor(ifelse(demographics_2013$gendersp=="Female","Female",ifelse(demographics_2013$gendersp=="Male","Male",NA)))
demographics_2013$gender = relevel(demographics_2013$gender, ref = "Male")

demographics_2013$income = sapply(demographics_2013$incomehhldsp, income)
demographics_2013$income_thousands = demographics_2013$income/1000 

demographics_2013$educ = as.factor(ifelse(demographics_2013$educsp=="No formal education"|demographics_2013$educsp=="7th or 8th grade"|demographics_2013$educsp=="9th grade"|demographics_2013$educsp=="10th grade"|demographics_2013$educsp=="11th grade"|demographics_2013$educsp=="12th grade NO DIPLOMA","No HS",ifelse(demographics_2013$educsp=="HIGH SCHOOL GRADUATE - high school DIPLOMA or the equivalent (GED)","High school",ifelse(demographics_2013$educsp=="Some college, but no degree"|demographics_2013$educsp=="Associate degree","Some college",ifelse(demographics_2013$educsp=="Bachelor's degree"|demographics_2013$educsp=="Master's degree"|demographics_2013$educsp=="Professional degree"|demographics_2013$educsp=="Doctorate degree","Bachelor's degree+",NA)))))
demographics_2013$educ = relevel(demographics_2013$educ, ref = "No HS")
table(demographics_2013$educsp,demographics_2013$educ)

summary(demographics_2013)

##Joining PID dataframe and demographic dataframes
complete_cases_PID7 = merge(complete_cases_PID7,demographics_2011, by="WUSTLID", all.x=TRUE)
complete_cases_PID7 = merge(complete_cases_PID7,demographics_2013, by="WUSTLID", all.x=TRUE)

##Creating two more predictors
complete_cases_PID7$partisan_intensity_S1 = ifelse(complete_cases_PID7$S1_PID7==3|complete_cases_PID7$S1_PID7==-3,2,ifelse(complete_cases_PID7$S1_PID7==2|complete_cases_PID7$S1_PID7==-2,1,ifelse(complete_cases_PID7$S1_PID7==1|complete_cases_PID7$S1_PID7==0|complete_cases_PID7$S1_PID7==-1,0,NA)))
table(complete_cases_PID7$partisan_intensity_S1,complete_cases_PID7$S1_PID7)

complete_cases_PID7$democrat_S1 = ifelse(complete_cases_PID7$S1_PID7<0,1,ifelse(complete_cases_PID7$S1_PID7>0,0,ifelse(complete_cases_PID7$S1_PID7==0,0.5,NA)))
table(complete_cases_PID7$democrat_S1,complete_cases_PID7$S1_PID7)

##OLS regression equations
summary(lm(SD_PID7 ~ gender, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ income_thousands, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ region, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ PPETHMS1, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ agesp, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ educ, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ partisan_intensity_S1, data = complete_cases_PID7))
summary(lm(SD_PID7 ~ democrat_S1, data = complete_cases_PID7))

summary(complete_cases_PID7$SD_PID7)
hist(complete_cases_PID7$SD_PID7)

mean(complete_cases_PID7$SD_PID7)



####OLS and IV Regressions for Coefficients Table
###Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

##OLS Regressions
summary(lm(S5_PID7 ~ S1_PID7, data = complete_cases_PID7))
summary(lm(S7_PID7 ~ S5_PID7, data = complete_cases_PID7))
summary(lm(S10_PID7 ~ S7_PID7, data = complete_cases_PID7))
summary(lm(S11_PID7 ~ S10_PID7, data = complete_cases_PID7))
summary(lm(S12_PID7 ~ S11_PID7, data = complete_cases_PID7))
summary(lm(S17_PID7 ~ S12_PID7, data = complete_cases_PID7))
summary(lm(S18_PID7 ~ S17_PID7, data = complete_cases_PID7))
summary(lm(S20_PID7 ~ S18_PID7, data = complete_cases_PID7))
summary(lm(S25_PID7 ~ S20_PID7, data = complete_cases_PID7))
summary(lm(S28_PID7 ~ S25_PID7, data = complete_cases_PID7))
summary(lm(S31_PID7 ~ S28_PID7, data = complete_cases_PID7))
summary(lm(S34_PID7 ~ S31_PID7, data = complete_cases_PID7))
summary(lm(S36_PID7 ~ S34_PID7, data = complete_cases_PID7))
summary(lm(S38_PID7 ~ S36_PID7, data = complete_cases_PID7))
summary(lm(S44_PID7 ~ S38_PID7, data = complete_cases_PID7))
summary(lm(S46_PID7 ~ S44_PID7, data = complete_cases_PID7))
summary(lm(S50_PID7 ~ S46_PID7, data = complete_cases_PID7))
summary(lm(S51_PID7 ~ S50_PID7, data = complete_cases_PID7))
summary(lm(S54_PID7 ~ S51_PID7, data = complete_cases_PID7))
summary(lm(S61_PID7 ~ S54_PID7, data = complete_cases_PID7))
summary(lm(S64_PID7 ~ S61_PID7, data = complete_cases_PID7))
summary(lm(S66_PID7 ~ S64_PID7, data = complete_cases_PID7))
summary(lm(S70_PID7 ~ S66_PID7, data = complete_cases_PID7))

##IV Regressions
library('ivreg')
summary(ivreg(S7_PID7 ~ S5_PID7, ~ S1_PID7, data = complete_cases_PID7))
summary(ivreg(S10_PID7 ~ S7_PID7, ~ S5_PID7, data = complete_cases_PID7))
summary(ivreg(S11_PID7 ~ S10_PID7, ~ S7_PID7, data = complete_cases_PID7))
summary(ivreg(S12_PID7 ~ S11_PID7, ~ S10_PID7, data = complete_cases_PID7))
summary(ivreg(S17_PID7 ~ S12_PID7, ~ S11_PID7, data = complete_cases_PID7))
summary(ivreg(S18_PID7 ~ S17_PID7, ~ S12_PID7, data = complete_cases_PID7))
summary(ivreg(S20_PID7 ~ S18_PID7, ~ S17_PID7, data = complete_cases_PID7))
summary(ivreg(S25_PID7 ~ S20_PID7, ~ S18_PID7, data = complete_cases_PID7))
summary(ivreg(S28_PID7 ~ S25_PID7, ~ S20_PID7, data = complete_cases_PID7))
summary(ivreg(S31_PID7 ~ S28_PID7, ~ S25_PID7, data = complete_cases_PID7))
summary(ivreg(S34_PID7 ~ S31_PID7, ~ S28_PID7, data = complete_cases_PID7))
summary(ivreg(S36_PID7 ~ S34_PID7, ~ S31_PID7, data = complete_cases_PID7))
summary(ivreg(S38_PID7 ~ S36_PID7, ~ S34_PID7, data = complete_cases_PID7))
summary(ivreg(S44_PID7 ~ S38_PID7, ~ S36_PID7, data = complete_cases_PID7))
summary(ivreg(S46_PID7 ~ S44_PID7, ~ S38_PID7, data = complete_cases_PID7))
summary(ivreg(S50_PID7 ~ S46_PID7, ~ S44_PID7, data = complete_cases_PID7))
summary(ivreg(S51_PID7 ~ S50_PID7, ~ S46_PID7, data = complete_cases_PID7))
summary(ivreg(S54_PID7 ~ S51_PID7, ~ S50_PID7, data = complete_cases_PID7))
summary(ivreg(S61_PID7 ~ S54_PID7, ~ S51_PID7, data = complete_cases_PID7))
summary(ivreg(S64_PID7 ~ S61_PID7, ~ S54_PID7, data = complete_cases_PID7))
summary(ivreg(S66_PID7 ~ S64_PID7, ~ S61_PID7, data = complete_cases_PID7))
summary(ivreg(S70_PID7 ~ S66_PID7, ~ S64_PID7, data = complete_cases_PID7))

##Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares = rep(NA,21) 
for (i in 2:22){
  ww1970_r_squares[i-1] = wiley_r_squared(complete_cases_PID7,i)
}

ww1970_r_squares



####Cross-Panel Analysis
###TAPS VS. ISCAP
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_11_12 = taps_2012[,c("WUSTLID","S11_PID7","S12_PID7")]
waves_PID7_11_12 = waves_PID7_11_12[rowSums(is.na(waves_PID7_11_12))==0,]
waves_PID7_46 = taps_2015[,c("WUSTLID","S46_PID7")]
waves_PID7_46 = waves_PID7_46[rowSums(is.na(waves_PID7_46))==0,]
waves_PID7_11_12_46 = merge(waves_PID7_11_12,waves_PID7_46, by="WUSTLID")
waves_PID7_61 = taps_2016[,c("WUSTLID","S61_PID7")]
waves_PID7_61 = waves_PID7_61[rowSums(is.na(waves_PID7_61))==0,]
waves_PID7_11_12_46_61 = merge(waves_PID7_11_12_46,waves_PID7_61, by="WUSTLID")
complete_cases_PID7_TAPS_vs_ISCAP = waves_PID7_11_12_46_61[,2:5]

##Computing statistics
#Correlations
cor_PID7_listwise = round(cor(complete_cases_PID7_TAPS_vs_ISCAP),3)

lower.tri(cor_PID7_listwise, diag = FALSE)
upper.tri(cor_PID7_listwise, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7_listwise[lower.tri(cor_PID7_listwise, diag=TRUE)]=""
lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./TAPS_vs_ISCAP_PID7_correlations.csv")

#Means and SDs (Individuals)
complete_cases_PID7_TAPS_vs_ISCAP$Mean_PID7 = apply(complete_cases_PID7_TAPS_vs_ISCAP[,1:4], 1, mean)
complete_cases_PID7_TAPS_vs_ISCAP$SD_PID7 = apply(complete_cases_PID7_TAPS_vs_ISCAP[,1:4], 1, sd)

summary(complete_cases_PID7_TAPS_vs_ISCAP$Mean_PID7)
summary(complete_cases_PID7_TAPS_vs_ISCAP$SD_PID7)

#Means and SDs (Waves)
as.data.frame(round(apply(complete_cases_PID7_TAPS_vs_ISCAP, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(complete_cases_PID7_TAPS_vs_ISCAP, 2, sd, na.rm=TRUE),2))

##Time Series and IV Regressions
#OLS Regressions
summary(lm(S12_PID7 ~ S11_PID7, data = complete_cases_PID7_TAPS_vs_ISCAP))
summary(lm(S46_PID7 ~ S12_PID7, data = complete_cases_PID7_TAPS_vs_ISCAP))
summary(lm(S61_PID7 ~ S46_PID7, data = complete_cases_PID7_TAPS_vs_ISCAP))

#IV Regressions
library('ivreg')
summary(ivreg(S46_PID7 ~ S12_PID7, ~ S11_PID7, data = complete_cases_PID7_TAPS_vs_ISCAP))
summary(ivreg(S61_PID7 ~ S46_PID7, ~ S12_PID7, data = complete_cases_PID7_TAPS_vs_ISCAP))

#Computing Implied Wiley-Wiley (1970) R^2s
wiley_r_squared(complete_cases_PID7_TAPS_vs_ISCAP,2)

###TAPS VS. VSG
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_1_12 = taps_2012[,c("WUSTLID","S1_PID7","S12_PID7")]
waves_PID7_1_12 = waves_PID7_1_12[rowSums(is.na(waves_PID7_1_12))==0,]
waves_PID7_61 = taps_2016[,c("WUSTLID","S61_PID7")]
waves_PID7_61 = waves_PID7_61[rowSums(is.na(waves_PID7_61))==0,]
waves_PID7_1_12_61 = merge(waves_PID7_1_12,waves_PID7_61, by="WUSTLID")
waves_PID7_66 = taps_2017[,c("WUSTLID","S66_PID7")]
waves_PID7_66 = waves_PID7_66[rowSums(is.na(waves_PID7_66))==0,]
waves_PID7_1_12_61_66 = merge(waves_PID7_1_12_61,waves_PID7_66, by="WUSTLID")
complete_cases_PID7_TAPS_vs_VSG = waves_PID7_1_12_61_66[,2:5]

##Computing statistics
#Correlations
cor_PID7_listwise = round(cor(complete_cases_PID7_TAPS_vs_VSG),3)

lower.tri(cor_PID7_listwise, diag = FALSE)
upper.tri(cor_PID7_listwise, diag = FALSE)
lower_7_listwise<-cor_PID7_listwise
lower_7_listwise[lower.tri(cor_PID7_listwise, diag=TRUE)]=""
lower_7_listwise<-as.data.frame(lower_7_listwise)
lower_7_listwise
write.csv(lower_7_listwise, "./TAPS_vs_VSG_PID7_correlations.csv")

#Means and SDs (Individuals)
complete_cases_PID7_TAPS_vs_VSG$Mean_PID7 = apply(complete_cases_PID7_TAPS_vs_VSG[,1:4], 1, mean)
complete_cases_PID7_TAPS_vs_VSG$SD_PID7 = apply(complete_cases_PID7_TAPS_vs_VSG[,1:4], 1, sd)

summary(complete_cases_PID7_TAPS_vs_VSG$Mean_PID7)
summary(complete_cases_PID7_TAPS_vs_VSG$SD_PID7)

#Means and SDs (Waves)
as.data.frame(round(apply(complete_cases_PID7_TAPS_vs_VSG, 2, mean, na.rm=TRUE),2))
as.data.frame(round(apply(complete_cases_PID7_TAPS_vs_VSG, 2, sd, na.rm=TRUE),2))

##Time Series and IV Regressions
#OLS Regressions
summary(lm(S12_PID7 ~ S1_PID7, data = complete_cases_PID7_TAPS_vs_VSG))
summary(lm(S61_PID7 ~ S12_PID7, data = complete_cases_PID7_TAPS_vs_VSG))
summary(lm(S66_PID7 ~ S61_PID7, data = complete_cases_PID7_TAPS_vs_VSG))

#IV Regressions
library('ivreg')
summary(ivreg(S61_PID7 ~ S12_PID7, ~ S1_PID7, data = complete_cases_PID7_TAPS_vs_VSG))
summary(ivreg(S66_PID7 ~ S61_PID7, ~ S12_PID7, data = complete_cases_PID7_TAPS_vs_VSG))

#Computing Implied Wiley-Wiley (1970) R^2s
wiley_r_squared(complete_cases_PID7_TAPS_vs_VSG,2)



####Six Presentation Waves
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

##Filtering by columns
all_waves_PID7 = complete_cases_PID7[, c('S1_PID7','S11_PID7','S25_PID7','S38_PID7','S54_PID7','S70_PID7')]

##Time Series and IV Regressions
#OLS Regressions
summary(lm(S11_PID7 ~ S1_PID7, data = all_waves_PID7))
summary(lm(S25_PID7 ~ S11_PID7, data = all_waves_PID7))
summary(lm(S38_PID7 ~ S25_PID7, data = all_waves_PID7))
summary(lm(S54_PID7 ~ S38_PID7, data = all_waves_PID7))
summary(lm(S70_PID7 ~ S54_PID7, data = all_waves_PID7))

#IV Regressions
library('ivreg')
summary(ivreg(S25_PID7 ~ S11_PID7, ~ S1_PID7, data = all_waves_PID7))
summary(ivreg(S38_PID7 ~ S25_PID7, ~ S11_PID7, data = all_waves_PID7))
summary(ivreg(S54_PID7 ~ S38_PID7, ~ S25_PID7, data = all_waves_PID7))
summary(ivreg(S70_PID7 ~ S54_PID7, ~ S38_PID7, data = all_waves_PID7))

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares = rep(NA,3) 
for (i in 2:4){
  ww1970_r_squares[i-1] = wiley_r_squared(all_waves_PID7,i)
}

ww1970_r_squares



####Six Presentation Waves [INCLUDING WEIGHTS]
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","nov2011wt2S1","S5_PID7","S7_PID7","S10_PID7","S11_PID7","oct2012wt2S11","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7","dec2013wt2S25")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","jan2015wt2S38","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","may2016wt2S54","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7","jan2018wt2S70")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:31]

##Filtering by columns
all_waves_PID7 = complete_cases_PID7[, c('S1_PID7','S11_PID7','S25_PID7','S38_PID7','S54_PID7','S70_PID7',"nov2011wt2S1","oct2012wt2S11","dec2013wt2S25","jan2015wt2S38","may2016wt2S54","jan2018wt2S70")]

##IV Regressions
library('ivreg')
summary(ivreg(S25_PID7 ~ S11_PID7, ~ S1_PID7, data = all_waves_PID7))
summary(ivreg(S25_PID7 ~ S11_PID7, ~ S1_PID7, data = all_waves_PID7, weights = oct2012wt2S11))

summary(ivreg(S38_PID7 ~ S25_PID7, ~ S11_PID7, data = all_waves_PID7))
summary(ivreg(S38_PID7 ~ S25_PID7, ~ S11_PID7, data = all_waves_PID7, weights = dec2013wt2S25))

summary(ivreg(S54_PID7 ~ S38_PID7, ~ S25_PID7, data = all_waves_PID7))
summary(ivreg(S54_PID7 ~ S38_PID7, ~ S25_PID7, data = all_waves_PID7, weights = jan2015wt2S38))

summary(ivreg(S70_PID7 ~ S54_PID7, ~ S38_PID7, data = all_waves_PID7))
summary(ivreg(S70_PID7 ~ S54_PID7, ~ S38_PID7, data = all_waves_PID7, weights = may2016wt2S54))



####R^2 Over Time, First Wave Anchored
###TAPS
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","S5_PID7","S7_PID7","S10_PID7","S11_PID7","S12_PID7")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","S18_PID7","S20_PID7","S25_PID7")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","S31_PID7","S34_PID7","S36_PID7")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","S44_PID7","S46_PID7")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","S51_PID7","S54_PID7","S61_PID7")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","S66_PID7")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
complete_cases_PID7 = complete_cases_PID7[,2:25]

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares_first_wave_anchored = rep(NA,21) 
for (inc in 0:20){
  ww1970_r_squares_first_wave_anchored[inc+1] = wiley_r_squared_first_wave_anchored(complete_cases_PID7,2,inc)
}

ww1970_r_squares_first_wave_anchored

#Appending to data set
wiley_scatter_plot = data.frame(x=as.factor("TAPS"),y=as.numeric(ww1970_r_squares_first_wave_anchored))
wiley_scatter_plot$num_months = as.numeric(c(7,10,11,12,17,18,20,25,28,31,34,36,38,44,46,50,51,54,61,65,68))

###VSG
##Creating data set [COMPLETE CASES ONLY]
#Filtering by columns
all_waves_PID7 = vsg[, c('pid7_2011','pid7_2012','pid7_2016','pid7_2017','pid7_2018','pid7_2019')]

#Converting values to new scale and formatting as numeric
all_waves_PID7 = convert_to_numeric_PID7(all_waves_PID7)
#Filtering by rows
all_waves_PID7 = all_waves_PID7[rowSums(is.na(all_waves_PID7))==0,]

summary(rowSums(is.na(all_waves_PID7))) #Checking that the correct type of cases remain

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares_first_wave_anchored_vsg = rep(NA,3) 
for (inc in 0:2){
  ww1970_r_squares_first_wave_anchored_vsg[inc+1] = wiley_r_squared_first_wave_anchored(all_waves_PID7,2,inc)
}

ww1970_r_squares_first_wave_anchored_vsg

#Appending to data set
wiley_scatter_plot_vsg = data.frame(x=as.factor("VSG"),y=as.numeric(ww1970_r_squares_first_wave_anchored_vsg))
wiley_scatter_plot_vsg$num_months = as.numeric(c(60,67,76))

wiley_scatter_plot = rbind(wiley_scatter_plot,wiley_scatter_plot_vsg)

###ISCAP
##Creating data set [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_12 = merge(waves6710_11,wave12, by="MNO")
waves6710_13 = merge(waves6710_12,wave13, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_13,wave14, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_14,wave15, by.x="MNO", by.y="mno")

##Filtering by rows
#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)

summary(rowSums(is.na(all_waves_PID7))) #Checking that the correct type of cases remain

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares_first_wave_anchored_iscap = rep(NA,6) 
for (inc in 0:5){
  ww1970_r_squares_first_wave_anchored_iscap[inc+1] = wiley_r_squared_first_wave_anchored(all_waves_PID7,2,inc)
}

ww1970_r_squares_first_wave_anchored_iscap

#Appending to data set
wiley_scatter_plot_iscap = data.frame(x=as.factor("ISCAP"),y=as.numeric(ww1970_r_squares_first_wave_anchored_iscap))
wiley_scatter_plot_iscap$num_months = as.numeric(c(34,46,49,72,81,87))

wiley_scatter_plot = rbind(wiley_scatter_plot,wiley_scatter_plot_iscap)


###Creating Scatter Plot with Regression Line
wiley_scatter_plot[1,2] = 1.00
wiley_scatter_plot$num_years = wiley_scatter_plot$num_months/12
wiley_scatter_plot$years_log_scale = log(wiley_scatter_plot$num_months/12)
wiley_scatter_plot$inverse_log_years = exp(wiley_scatter_plot$years_log_scale)
wiley_scatter_plot$rate_of_decay = 0.975^(wiley_scatter_plot$inverse_log_years)
wiley_scatter_plot$log_R2 = log(wiley_scatter_plot$y)
wiley_scatter_plot$logged_rate_of_decay = log(wiley_scatter_plot$rate_of_decay)

lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot) #Used for manual entry into the final line of the ggplot command

#Writing to .csv
write.csv(wiley_scatter_plot,"./wiley_scatter_plot_data.csv", row.names = TRUE)

#Reading .csv
wiley_scatter_plot =read.csv("./wiley_scatter_plot_data.csv")

###Adding in Green and Palmquist (1994) results
wiley_scatter_plot_gp1994 = as.data.frame(matrix(nrow=14,ncol=3,dimnames=list(seq(1,14,1),c("x","y","num_months"))))
wiley_scatter_plot_gp1994$x = as.factor(c(rep("ANES-56-58-60",3),rep("ANES-72-74-76",2),rep("ANES-80",3),rep("ANES-84-85",2),rep("ANES-90-91-92",2),rep("Parent",2)))
#wiley_scatter_plot_gp1994$y = as.numeric(c(.917,.936,.918,.942,.968,.962,.977,.950,.976,1,1,.866,.891,.963))
#wiley_scatter_plot_gp1994$num_months = c(24,24,3,24,24,5,3,3,3,14,7,15,96,108)
wiley_scatter_plot_gp1994$y = as.numeric(c(.917,.917*.936,.917*.936*.918,.942,.942*.968,.962,.962*.977,.962*.977*.950,.976,.976*1,1,1*.866,.891,.891*.963))
wiley_scatter_plot_gp1994$num_months = c(24,48,51,24,48,5,8,11,3,17,7,22,96,204)
wiley_scatter_plot_gp1994$num_years = wiley_scatter_plot_gp1994$num_months/12
wiley_scatter_plot_gp1994$years_log_scale = log(wiley_scatter_plot_gp1994$num_months/12)
wiley_scatter_plot_gp1994$inverse_log_years = exp(wiley_scatter_plot_gp1994$years_log_scale)
wiley_scatter_plot_gp1994$rate_of_decay = 0.975^(wiley_scatter_plot_gp1994$inverse_log_years)
wiley_scatter_plot_gp1994$log_R2 = log(wiley_scatter_plot_gp1994$y)
wiley_scatter_plot_gp1994$logged_rate_of_decay = log(wiley_scatter_plot_gp1994$rate_of_decay)

lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_gp1994) #Used for manual entry into the final line of the ggplot command

wiley_scatter_plot_total = rbind(wiley_scatter_plot,wiley_scatter_plot_gp1994)

#Writing to .csv
write.csv(wiley_scatter_plot_total,"./wiley_scatter_plot_data_includes_gp1994.csv", row.names = TRUE)

#Reading .csv
wiley_scatter_plot_total = read.csv("./wiley_scatter_plot_data_includes_gp1994.csv")


##Unweighted plot
ggplot(wiley_scatter_plot_total, aes(x=years_log_scale, y=log_R2)) +
  geom_point(aes(shape=x)) +
  #stat_smooth(method=lm, se=FALSE, color="black") + #Overall slope for all data
  scale_shape_manual(values=c(15,16,17,0,1,2,3,4,5)) +
  labs(x="Logged Years", y = bquote("Logged Wiley-Wiley" ~R^2)) +
  ggtitle("") +
  theme_classic() +
  theme(legend.title = element_blank(), legend.position="bottom") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(plot.title = element_text(size = 16)) + 
  theme(axis.title = element_text(size = 12)) +
  theme(legend.text = element_text(size = 12)) + 
  geom_abline(intercept = -0.02509, slope = -0.05881, color="gray", linetype="solid", size=1) + #Slope for three main panels
  geom_abline(intercept = -0.07092, slope = -0.03813, color="gray", linetype="dashed", size=1) + #Slope for G&P 1994 panels
  scale_y_continuous(
    # Add a second axis and specify its features
    sec.axis = sec_axis(trans=~exp(.), name=bquote("Wiley-Wiley" ~R^2)))

#Saving in high resolution
ggsave(filename = "./Wiley_Scatter_Plot_Graphic_Unweighted_090922.png", device='png', width=6.696667, height=3.53, units=c("in"), dpi=600)



####R^2 Over Time, First Wave Anchored [WEIGHTED]
###TAPS
##Creating data set [COMPLETE CASES ONLY]
#PID7
waves_PID7_2012 = taps_2012[,c("WUSTLID","S1_PID7","nov2011wt2S1","S5_PID7","apr2012wt2S5","S7_PID7","jun2012wt2S7","S10_PID7","sep2012wt2S10","S11_PID7","oct2012wt2S11","S12_PID7","nov2012wt2S12")]
waves_PID7_2012 = waves_PID7_2012[rowSums(is.na(waves_PID7_2012))==0,]
waves_PID7_2013 = taps_2013[,c("WUSTLID","S17_PID7","apr2013wt2S17","S18_PID7","may2013wt2S18","S20_PID7","jul2013wt2S20","S25_PID7","dec2013wt2S25")]
waves_PID7_2013 = waves_PID7_2013[rowSums(is.na(waves_PID7_2013))==0,]
waves_PID7_2012_2013 = merge(waves_PID7_2012,waves_PID7_2013, by="WUSTLID")
waves_PID7_2014 = taps_2014[,c("WUSTLID","S28_PID7","mar2014wt2S28","S31_PID7","jun2014wt2S31","S34_PID7","sep2014wt2S34","S36_PID7","nov2014wt2S36")]
waves_PID7_2014 = waves_PID7_2014[rowSums(is.na(waves_PID7_2014))==0,]
waves_PID7_2012_2014 = merge(waves_PID7_2012_2013,waves_PID7_2014, by="WUSTLID")
waves_PID7_2015 = taps_2015[,c("WUSTLID","S38_PID7","jan2015wt2S38","S44_PID7","jul2015wt2S44","S46_PID7","sep2015wt2S46")]
waves_PID7_2015 = waves_PID7_2015[rowSums(is.na(waves_PID7_2015))==0,]
waves_PID7_2012_2015 = merge(waves_PID7_2012_2014,waves_PID7_2015, by="WUSTLID")
waves_PID7_2016 = taps_2016[,c("WUSTLID","S50_PID7","jan2016wt2S50","S51_PID7","feb2016wt2S51","S54_PID7","may2016wt2S54","S61_PID7","dec2016wt2S61")]
waves_PID7_2016 = waves_PID7_2016[rowSums(is.na(waves_PID7_2016))==0,]
waves_PID7_2012_2016 = merge(waves_PID7_2012_2015,waves_PID7_2016, by="WUSTLID")
waves_PID7_2017 = taps_2017[,c("WUSTLID","S64_PID7","apr2017wt2S64","S66_PID7","jul2017wt2S66")]
waves_PID7_2017 = waves_PID7_2017[rowSums(is.na(waves_PID7_2017))==0,]
waves_PID7_2012_2017 = merge(waves_PID7_2012_2016,waves_PID7_2017, by="WUSTLID")
waves_PID7_2018 = taps_2018[,c("WUSTLID","S70_PID7","jan2018wt2S70")]
waves_PID7_2018 = waves_PID7_2018[rowSums(is.na(waves_PID7_2018))==0,]
complete_cases_PID7 = merge(waves_PID7_2012_2017,waves_PID7_2018, by="WUSTLID")
weights = complete_cases_PID7[,seq(3,49,2)]
complete_cases_PID7 = complete_cases_PID7[,seq(2,49,2)]

#Vector of weights
weights_TAPS = list(seq(1,20,1),weights)

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares_first_wave_anchored_weighted = rep(NA,21) 
for (inc in 0:length(weights_TAPS[1][[1]])){
  ww1970_r_squares_first_wave_anchored_weighted[inc+1] = wiley_r_squared_first_wave_anchored_weighted(complete_cases_PID7,2,inc,weights_TAPS[2][[1]][[inc+1]])
}

ww1970_r_squares_first_wave_anchored_weighted

#Appending to data set
wiley_scatter_plot_weighted = data.frame(x=as.factor("TAPS"),y=as.numeric(ww1970_r_squares_first_wave_anchored_weighted))
wiley_scatter_plot_weighted$num_months = as.numeric(c(7,10,11,12,17,18,20,25,28,31,34,36,38,44,46,50,51,54,61,65,68))

###VSG
###Creating data set [COMPLETE CASES ONLY]
##Filtering by columns
all_waves_PID7 = vsg[, c('pid7_2011','pid7_2012','pid7_2016','pid7_2017','pid7_2018','pid7_2019')]

##Converting values to new scale and formatting as numeric
all_waves_PID7 = convert_to_numeric_PID7(all_waves_PID7)

##Creating a filter label
all_waves_PID7$filter = ifelse(rowSums(is.na(all_waves_PID7))==0,1,0)

##Appending a weight for complete case respondents (see 2019 dfvsg_Guide PDF)
weights = vsg[,c('weight_2016','weight_2017','weight_panel_2018','weight_2019')]

for(i in 1:nrow(weights)){
  for(j in 1:ncol(weights)){
    weights[i,j] = ifelse(is.na(weights[i,j]),1,weights[i,j])
  }
}

all_waves_PID7 = cbind(all_waves_PID7,weights)

##Filtering by rows
all_waves_PID7 = subset(all_waves_PID7, filter==1)

summary(rowSums(is.na(all_waves_PID7))) #Checking that the correct type of cases remain

#Vector of weights
weights = all_waves_PID7[,8:11]
all_waves_PID7 = all_waves_PID7[,1:6]

weights_VSG = list(seq(1,2,1),weights)

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares_first_wave_anchored_vsg_weighted = rep(NA,3) 
for (inc in 0:length(weights_VSG[1][[1]])){
  ww1970_r_squares_first_wave_anchored_vsg_weighted[inc+1] = wiley_r_squared_first_wave_anchored_weighted(all_waves_PID7,2,inc,weights_VSG[2][[1]][[inc+1]])
}

ww1970_r_squares_first_wave_anchored_vsg_weighted

#Appending to data set
wiley_scatter_plot_vsg_weighted = data.frame(x=as.factor("VSG"),y=as.numeric(ww1970_r_squares_first_wave_anchored_vsg_weighted))
wiley_scatter_plot_vsg_weighted$num_months = as.numeric(c(60,67,76))

wiley_scatter_plot_weighted = rbind(wiley_scatter_plot_weighted,wiley_scatter_plot_vsg_weighted)

###ISCAP
##Creating data set [COMPLETE CASES ONLY]
##Proposed final nine measures INNER JOIN
waves67 = merge(wave6,wave7, by="MNO")
waves6710 = merge(waves67,wave10, by="MNO")
waves6710_11 = merge(waves6710,wave11, by="MNO")
waves6710_13 = merge(waves6710_11,wave13, by.x="MNO", by.y="mno")
waves6710_15 = merge(waves6710_13,wave15, by.x="MNO", by.y="mno")
waves6710_14 = merge(waves6710_15,wave14, by.x="MNO", by.y="mno")
waves6710_12 = merge(waves6710_14,wave12, by="MNO")
waves6710_15 = waves6710_12

##Filtering by rows
#Removing if PID7 value is not applicable
waves6710_15 = waves6710_15[waves6710_15$PID7_w6 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w7 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w12 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w15 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

waves6710_15 = waves6710_15[waves6710_15$PID7_w10_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w11_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w13 %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]
waves6710_15 = waves6710_15[waves6710_15$PID7_w14_pre %in% c("Strong Democrat", "Not Strong Democrat", "Leans Democrat", "Undecided/Independent/Other", "Leans Republican", "Not Strong Republican", "Strong Republican"), ]

#Removing if pre-survey partisanship measure is not within the defined date range
waves6710_15 = waves6710_15[waves6710_15$pppadate_w10>=20150728 & waves6710_15$pppadate_w10<=20150903,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w11>=20160715 & waves6710_15$pppadate_w11<=20160827,] #The cut points could be extended to include more subjects
waves6710_15 = waves6710_15[waves6710_15$pppadate_w14>=20190611 & waves6710_15$pppadate_w14<=20190820,] #The cut points could be extended to include more subjects

##Filtering by columns
all_waves_PID7 = waves6710_15[, c('PID7_w6','PID7_w7','PID7_w10_pre','PID7_w11_pre','PID7_w12','PID7_w13','PID7_w14_pre','PID7_w14','PID7_w15')]
all_waves_PID7$PID7_w10_pre = as.character(all_waves_PID7$PID7_w10_pre)
all_waves_PID7$PID7_w11_pre = as.character(all_waves_PID7$PID7_w11_pre)
all_waves_PID7$PID7_w13 = as.character(all_waves_PID7$PID7_w13)
all_waves_PID7$PID7_w14_pre = as.character(all_waves_PID7$PID7_w14_pre)

##Converting categorical values to numeric
all_waves_PID7 = convert_to_numeric(all_waves_PID7)
all_waves_PID7 = convert_to_numeric_2(all_waves_PID7)

summary(rowSums(is.na(all_waves_PID7))) #Checking that the correct type of cases remain

##Vector of weights
weights = waves6710_15[, c('weight_pre','PostWeight','weight','weight1.x','weight1','weight1.y')] #weight_pre is wave6, PostWeight is wave7, weight is wave10, weight1.x is wave11, weight1.y is wave13, and weight1 is wave12.

weights_ISCAP = list(seq(1,5,1),weights)

#Computing Implied Wiley-Wiley (1970) R^2s
ww1970_r_squares_first_wave_anchored_iscap_weighted = rep(NA,6) 
for (inc in 0:length(weights_ISCAP[1][[1]])){
  ww1970_r_squares_first_wave_anchored_iscap_weighted[inc+1] = wiley_r_squared_first_wave_anchored_weighted(all_waves_PID7,2,inc,weights_ISCAP[2][[1]][[inc+1]])
}

ww1970_r_squares_first_wave_anchored_iscap_weighted

#Appending to data set
wiley_scatter_plot_iscap_weighted = data.frame(x=as.factor("ISCAP"),y=as.numeric(ww1970_r_squares_first_wave_anchored_iscap_weighted))
wiley_scatter_plot_iscap_weighted$num_months = as.numeric(c(34,46,49,72,81,87))

wiley_scatter_plot_weighted = rbind(wiley_scatter_plot_weighted,wiley_scatter_plot_iscap_weighted)


###Creating Scatter Plot with Regression Line
wiley_scatter_plot_weighted[1,2] = 1.00
wiley_scatter_plot_weighted$num_years = wiley_scatter_plot_weighted$num_months/12
wiley_scatter_plot_weighted$years_log_scale = log(wiley_scatter_plot_weighted$num_months/12)
wiley_scatter_plot_weighted$inverse_log_years = exp(wiley_scatter_plot_weighted$years_log_scale)
wiley_scatter_plot_weighted$rate_of_decay = 0.975^(wiley_scatter_plot_weighted$inverse_log_years)
wiley_scatter_plot_weighted$log_R2 = log(wiley_scatter_plot_weighted$y)
wiley_scatter_plot_weighted$logged_rate_of_decay = log(wiley_scatter_plot_weighted$rate_of_decay)

#Writing to .csv
write.csv(wiley_scatter_plot_weighted,"./wiley_scatter_plot_weighted.csv", row.names = TRUE)

#Reading .csv
wiley_scatter_plot_weighted = read.csv("./wiley_scatter_plot_weighted.csv")

###Computing Regression Equation
lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_weighted) #Used for manual entry into the final line of the ggplot command
exp(-0.04272)

wiley_scatter_plot_total_weighted = rbind(wiley_scatter_plot_weighted,wiley_scatter_plot_gp1994)

lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_gp1994) #Used for manual entry into the final line of the ggplot command
exp(-0.07092)

###Final plot
ggplot(wiley_scatter_plot_total_weighted, aes(x=years_log_scale, y=log_R2)) +
  geom_point(aes(shape=x)) +
  #stat_smooth(method=lm, se=FALSE, color="black") + #Overall slope for all data
  scale_shape_manual(values=c(15,16,17,0,1,2,3,4,5)) +
  labs(x="Logged Years", y = bquote("Logged Wiley-Wiley" ~R^2)) +
  ggtitle("") +
  theme_classic() +
  theme(legend.title = element_blank(), legend.position="bottom") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(plot.title = element_text(size = 16)) + 
  theme(axis.title = element_text(size = 12)) +
  theme(legend.text = element_text(size = 12)) + 
  geom_abline(intercept = -0.04272, slope = -0.05613, color="gray", linetype="solid", size=1) + #Slope for three main panels
  geom_abline(intercept = -0.07092, slope = -0.03813, color="gray", linetype="dashed", size=1) + #Slope for G&P 1994 panels
  scale_y_continuous(
    # Add a second axis and specify its features
    sec.axis = sec_axis(trans=~exp(.), name=bquote("Wiley-Wiley" ~R^2)))

#Saving in high resolution
ggsave(filename = "./Wiley_Scatter_Plot_Graphic_Weighted_090922.png", device='png', width=6.696667, height=3.53, units=c("in"), dpi=600)


###Test of statistical significance
wiley_scatter_plot_total_weighted$older = as.factor(c(rep(0,30),rep(1,14)))
summary(lm(log_R2 ~ years_log_scale + older, data = wiley_scatter_plot_total_weighted))
summary(lm(y ~ num_years + older, data = wiley_scatter_plot_total_weighted))
summary(lm(y ~ num_years*older, data = wiley_scatter_plot_total_weighted))
summary(lm(log_R2 ~ years_log_scale*older, data = wiley_scatter_plot_total_weighted))
##No significance on the older variable or on the interaction term. Thus, not statistically different.

##F-Test
pooled = lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_total_weighted)
pooled_SSR = sum(resid(pooled)^2)

old = lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_total_weighted, subset = older==1)
old_SSR = sum(resid(old)^2)

new = lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_total_weighted, subset = older==0)
new_SSR = sum(resid(new)^2)

summed_SSR = old_SSR + new_SSR

f_statistic = pooled_SSR/summed_SSR #1.05, df = 2 and 8? Does not fall beneath threshold: https://www.statisticshowto.com/tables/f-table/


###Testing out full Green and Palmquist (1994) results, not just national
wiley_scatter_plot_gp1994_full = as.data.frame(matrix(nrow=23,ncol=3,dimnames=list(seq(1,23,1),c("x","y","num_months"))))
wiley_scatter_plot_gp1994_full$x = as.factor(c(rep("ANES-56-58-60",3),rep("ANES-72-74-76",2),rep("ANES-80",3),rep("ANES-84-85",2),rep("ANES-90-91-92",2),rep("Parent",2),rep("Youth",2),rep("Erie",4),rep("Terman",3)))
#wiley_scatter_plot_gp1994_full$y = as.numeric(c(.917,.936,.918,.942,.968,.962,.977,.950,.976,1,1,.866,.891,.963))
#wiley_scatter_plot_gp1994_full$num_months = c(24,24,3,24,24,5,3,3,3,14,7,15,96,108)
wiley_scatter_plot_gp1994_full$y = as.numeric(c(.917,.917*.936,.917*.936*.918,.942,.942*.968,.962,.962*.977,.962*.977*.950,.976,.976*1,1,1*.866,.891,.891*.963,.609,.609*.853,.923,.923*.978,.923*.978*.982,.923*.978*.982*.997,.837,.837*.841,.837*.841*.648))
wiley_scatter_plot_gp1994_full$num_months = c(24,48,51,24,48,5,8,11,3,17,7,22,96,204,96,204,1.5,3,4.5,6,120,240,444)
wiley_scatter_plot_gp1994_full$num_years = wiley_scatter_plot_gp1994_full$num_months/12
wiley_scatter_plot_gp1994_full$years_log_scale = log(wiley_scatter_plot_gp1994_full$num_months/12)
wiley_scatter_plot_gp1994_full$inverse_log_years = exp(wiley_scatter_plot_gp1994_full$years_log_scale)
wiley_scatter_plot_gp1994_full$rate_of_decay = 0.975^(wiley_scatter_plot_gp1994_full$inverse_log_years)
wiley_scatter_plot_gp1994_full$log_R2 = log(wiley_scatter_plot_gp1994_full$y)
wiley_scatter_plot_gp1994_full$logged_rate_of_decay = log(wiley_scatter_plot_gp1994_full$rate_of_decay)

lm(log_R2 ~ years_log_scale, data = wiley_scatter_plot_gp1994_full) #Used for manual entry into the final line of the ggplot command

exp(-.11852) #.888



####Creating a Macropartisanship Time Series Plot
#Gallup
gallup_macro_pid = read.csv("./gallup_macro_data.csv")
gallup_macro_pid = gallup_macro_pid[,c(1,2,3)]
gallup_macro_pid$day = 15
library("stringr")
gallup_macro_pid$month = ifelse(str_sub(gallup_macro_pid$date,-1,-1)==1,2,ifelse(str_sub(gallup_macro_pid$date,-1,-1)==2,5,ifelse(str_sub(gallup_macro_pid$date,-1,-1)==3,8,ifelse(str_sub(gallup_macro_pid$date,-1,-1)==4,11,NA))))
table(gallup_macro_pid$month)
library("lubridate")
gallup_macro_pid$date_revised = make_date(gallup_macro_pid$year,gallup_macro_pid$month,gallup_macro_pid$day)
gallup_macro_pid$date_revised = as.Date(gallup_macro_pid$date_revised)
gallup_macro_pid = subset(gallup_macro_pid, year>=2011 & year<=2020)
gallup_macro_pid$panel = "Gallup"
gallup_macro_pid = gallup_macro_pid[,c("macropartisanship","date_revised","panel")]

#TAPS (Run complete cases script)
TAPS_macro_pid = as.data.frame(round(apply(complete_cases_PID3, 2, macroP),3))
colnames(TAPS_macro_pid) <- c("macropartisanship")
TAPS_macro_pid$date_revised = c(as.Date("2011-11-15"),as.Date("2012-04-15"),as.Date("2012-06-15"),as.Date("2012-09-15"),as.Date("2012-10-15"),as.Date("2012-11-15"),as.Date("2013-04-15"),as.Date("2013-05-15"),as.Date("2013-07-15"),as.Date("2013-12-15"),as.Date("2014-03-15"),as.Date("2014-06-15"),as.Date("2014-09-15"),as.Date("2014-11-15"),as.Date("2015-01-15"),as.Date("2015-07-15"),as.Date("2015-09-15"),as.Date("2016-01-15"),as.Date("2016-02-15"),as.Date("2016-05-15"),as.Date("2016-12-15"),as.Date("2017-04-15"),as.Date("2017-07-15"),as.Date("2018-01-15"))
TAPS_macro_pid$panel = "TAPS"

#ISCAP (Run complete cases script)
ISCAP_macro_pid = as.data.frame(round(apply(all_waves_PID3, 2, macroP),3))
colnames(ISCAP_macro_pid) <- c("macropartisanship")
ISCAP_macro_pid$date_revised = c(as.Date("2012-10-24"),as.Date("2012-12-21"),as.Date("2015-08-14"),as.Date("2016-08-06"),as.Date("2016-12-02"),as.Date("2018-10-30"),as.Date("2019-07-15"),as.Date("2020-01-28"),as.Date("2020-10-15"))
ISCAP_macro_pid$panel = "ISCAP"

#VSG (Run complete cases script)
VSG_macro_pid = as.data.frame(round(apply(all_waves_PID3, 2, macroP),3))
colnames(VSG_macro_pid) <- c("macropartisanship")
VSG_macro_pid$date_revised = c(as.Date("2011-12-15"),as.Date("2012-11-15"),as.Date("2016-12-14"),as.Date("2017-07-19"),as.Date("2018-04-25"),as.Date("2018-12-12"))
VSG_macro_pid$panel = "VSG"

#Combining data
macro_pid_data = rbind(gallup_macro_pid,TAPS_macro_pid,ISCAP_macro_pid,VSG_macro_pid)

#Writing to .csv
write.csv(macro_pid_data,"./Macropartisanship_line_chart_052622.csv")
macro_pid_data = read.csv("./Macropartisanship_line_chart_052622.csv")

##Visualizing
ggplot(macro_pid_data, aes(x=date_revised, y=macropartisanship, group=panel)) +
  geom_line(aes(linetype=panel)) + 
  geom_point(aes(shape=panel)) +
  labs(x="Year", y = "Macropartisanship") +
  ylim(0.45,0.65) + 
  scale_x_date(date_breaks="1 year", date_labels="%Y") + 
  ggtitle("") +
  theme_classic() +
  theme(legend.title = element_blank(),
        legend.position="bottom",
        plot.title = element_text(hjust = 0.5, size=20),
        axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12),
        legend.text = element_text(size=12))

#Saving in high resolution
ggsave(filename = "./Macropartisanship_line_chart_090922.png", device='png', width=9.081667, height=5.03, units = c("in"), dpi=600)


##Regressing macro_pid on time with fixed effects for panel
macro_pid_data$years_since_origin = as.numeric(as.Date(macro_pid_data$date_revised,tryFormats = c("%m/%d/%Y")) - as.Date("2/15/2011",tryFormats = c("%m/%d/%Y")))/365

summary(lm(macropartisanship ~ years_since_origin + as.factor(panel), data = macro_pid_data))